diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..721f3a3f6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +chappage.txt +main_spec.aux +main_spec.idx +main_spec.ilg +main_spec.ind +main_spec.log +main_spec.out +main_spec.pdf +main_spec.toc diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..9948118dd --- /dev/null +++ b/.travis.yml @@ -0,0 +1,19 @@ +sudo: required +dist: trusty + +addons: + apt: + packages: + - texlive-latex-base + - texlive-latex-recommended + - texlive-latex-extra + - texlive-generic-recommended + - latex-xcolor + - texlive-fonts-recommended + - texlive-fonts-extra + +before_install: +install: +script: + - make LATEXOPT=-halt-on-error + - make clean diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..0a48e04d4 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +TARGET = main_spec +SOURCES = $(shell find . -name '*.tex' -print) +FIGURES = $(shell find figures -name '*.pdf' -print) +EXAMPLES = $(shell find example_code -name '*.[c,f]*' -print) + +.PHONY: all +all: ${TARGET}.pdf + +${TARGET}.pdf: ${SOURCES} ${FIGURES} ${EXAMPLES} + pdflatex $(LATEXOPT) ${TARGET} + makeindex ${TARGET} + pdflatex $(LATEXOPT) ${TARGET} + makeindex ${TARGET} + pdflatex $(LATEXOPT) ${TARGET} + +.PHONY: clean +clean: + rm -f ${TARGET}.{log,aux,ps,dvi,bbl,blg,log,idx,out,toc,pdf,out} chappage.txt + diff --git a/_deprecated_sources/C/Makefile b/_deprecated_sources/C/Makefile deleted file mode 100644 index f734dbde7..000000000 --- a/_deprecated_sources/C/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -SHELL = /bin/sh - -CC = oshcc -CFLAGS = -Wall -pedantic -DNDEBUG -O3 - -RUNCMD=oshrun -NPROCOPT=-np -NPROC=2 -RUNOPT=$(NPROCOPT) $(NPROC) - -C_TESTS = circ.c cshift.c helloworld.c pi.c reduce-max.c shmem_long_finc_only.c -C_EXES = $(C_TESTS:.c=) -EXES = $(C_EXES) - -all default: $(EXES) - -run-circ: circ - $(RUNCMD) $(RUNOPT) ./circ - -run-cshift: cshift - $(RUNCMD) $(RUNOPT) ./cshift - -run-helloworld: helloworld - $(RUNCMD) $(RUNOPT) ./helloworld - -run-pi: pi - $(RUNCMD) $(RUNOPT) ./pi - -run-reduce-max: reduce-max - $(RUNCMD) $(RUNOPT) ./reduce-max - -run-finc: shmem_long_finc_only - $(RUNCMD) $(RUNOPT) ./shmem_long_finc_only - -run: all run-circ run-cshift run-helloworld run-pi run-reduce-max run-finc - -clean: - rm -f $(EXES) diff --git a/_deprecated_sources/C/all2all.c b/_deprecated_sources/C/all2all.c deleted file mode 100644 index f4b57c990..000000000 --- a/_deprecated_sources/C/all2all.c +++ /dev/null @@ -1,200 +0,0 @@ -/* -** Required includes and definitions -*/ - -#include -#include -#include -#include -#include -#include - -/* -** LP64 types -*/ - -typedef signed char int8 ; -typedef unsigned char uint8 ; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long int64; -typedef unsigned long uint64; - -/* -** timing -*/ - -#include - -typedef struct { - double accum_wall, accum_cpus; - double start_wall, start_cpus; - time_t init_time; - char running; -} timer; - -void timer_clear (timer *t); -void timer_start (timer *t); -void timer_stop (timer *t); -void timer_report (timer *t, double *pwall, double *pcpus, int64 print); - -/* -** some masking macros -*/ - -#define _ZERO64 0uL -#define _maskl(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) << (64-(x)))) -#define _maskr(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) >> (64-(x)))) -#define _mask(x) (((x) < 64) ? _maskl(x) : _maskr(2*64 - (x))) - -/* -** PRNG -*/ - -#define _BR_RUNUP_ 128L -#define _BR_LG_TABSZ_ 7L -#define _BR_TABSZ_ (1L<<_BR_LG_TABSZ_) - -typedef struct { - uint64 hi, lo, ind; - uint64 tab[_BR_TABSZ_]; -} brand_t; - -#define _BR_64STEP_(H,L,A,B) {\ - uint64 x;\ - x = H ^ (H << A) ^ (L >> (64-A));\ - H = L | (x >> (B-64));\ - L = x << (128 - B);\ -} - -uint64 brand (brand_t *p); -void brand_init (brand_t *p, uint64 val); - -/* -** shmem functionality -*/ - -/* -** max size of tab (in wrds) -*/ - -#ifndef LG_NWRDS -#define LG_NWRDS 24L -#endif -#define NWRDS (1uL << LG_NWRDS) - -#include - -#define MY_GTHREAD shmem_my_pe() -#define GTHREADS shmem_n_pes() -#define TYPE uint64 - -TYPE *mpp_alloc (int64 nbytes); -void mpp_free (TYPE *ptr); -int64 mpp_accum_long (int64 val); -void do_sync_init (void); -int64 all2all (TYPE *dst, uint64 *src, int64 len, int64 nwrd); -void do_all2all (TYPE *tab, uint64 *loc, brand_t *br, int64 msize, - int64 tsize, int64 rep, int64 print); -void do_warmup (brand_t *br); - - -/* -** driver program -*/ - -int main (int argc, char *argv[]) - -{ - brand_t br; - int64 seed, arg, msize, tsize, rep; - TYPE *tab; - uint64 *loc; - - start_pes(0); - - if (argc < 5) { - if (MY_GTHREAD == 0) - fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " - "[ms2 ts2 rc2 ..]\n", argv[0]); - goto DONE; - } - -/* -** alloc some shared space -** (checks for valid pointer and casts) -*/ - - tab = mpp_alloc (NWRDS * sizeof(uint64)); - -/* -** pointer to local space -*/ - - loc = &tab[0]; - - -/* -** init all local memory -*/ - - bzero ((void *)&loc[0], NWRDS * sizeof(uint64)); - - seed = atol (argv[1]); - if (MY_GTHREAD == 0) - printf ("base seed is %ld\n", seed); - seed += (uint64)MY_GTHREAD << 32; - -/* -** seed uniquely per PE -*/ - - brand_init (&br, seed); - - arg = 2; - - while (arg < argc) { - msize = atol (argv[arg++]); - if (arg >= argc) - break; - tsize = atol (argv[arg++]) * (1L << 20); - if (arg >= argc) - break; - rep = atol (argv[arg++]); - - if (MY_GTHREAD == 0) - printf ("tsize = %ldMB msize = %5ldB\n", tsize/(1L<<20), msize); - if (msize < sizeof(long)) { - if (MY_GTHREAD == 0) - printf ("msize must be > %ld B\n", (int64)sizeof(long)); - goto DONE; - } - if (tsize > (NWRDS * sizeof(long))) { - if (MY_GTHREAD == 0) - printf ("tsize must be < %ld MiB\n", - (int64)(NWRDS * sizeof(long)) / (1uL<<20)); - goto DONE; - } - -/* -** exits on error -*/ - - do_all2all (tab, loc, &br, msize, tsize, rep, 1); - - if (MY_GTHREAD == 0) - printf ("\n"); - } - -/* -** free up the shared memory -*/ - - mpp_free (tab); - - DONE: - shmem_barrier_all(); - return 0; -} diff --git a/_deprecated_sources/C/all2all_main.c b/_deprecated_sources/C/all2all_main.c deleted file mode 100644 index 0e0a0ad6a..000000000 --- a/_deprecated_sources/C/all2all_main.c +++ /dev/null @@ -1,276 +0,0 @@ -/* CVS info */ -/* $RCSfile: all2all_main.c,v $ */ -/* -* Purpose: all2all.c copies data from one half of a table to the other -* half of the table. -* -* -* Date Description -* -* all2all has been modified to automatically compare cksum results -* for 128 processors at run time and to print an error message -* if there is a discrepancy. -* In the future additional error checking for any number of processors -* will be done. -* added memset(tab,0,tsize) -* -* Preprocessor DEFINED Variables: -* 1. This benchmark will automatically verify checksums unless CHECKOFF is -* defined in the makefile flags. To turn off the automated check specify -* -DCHECKOFF in the makefile flags and recompile. -* (MUST define CHECKOFF if not using 128 processors.) -* 2. If additional timing info is needed for debugging specify -DPTIMES in -* the makfile flags. -*/ - -#include -#include "all2all.h" - -int64 SELF, SIZE; - -#if 0 -int64 known_v[] = { - 0x889d1f6f6b165117, - 0xc2597eee7a77503b, - 0x9fde67a85fec3140, - 0x98218560b0e2fcad, - 0x77970e91ec2ae92f, - 0xd7c257a76e652480, - 0xfae8fc3473e44bd7, - 0xae70524b190b97d1, - 0xbd3481e6d55c2587, - 0x92b1e34c9a63c162, - 0xd53483207d373375, - 0x818b5ae39e15de0c, - 0xa10c2c69b3441650, - 0x3213b203ef570cfe, - 0x953cacafbc6694af, - 0x0435c6359cfeac6a, - 0x0107162b374ac090, - 0x3b4579d543eb131e, - 0x1f46dbcd8e23ca22, - 0x4f99bd5b1c45bff2, - 0x69872eca2dd09002, - 0x5a10168c91da8c2e, - 0xfb7842751192f1bf, - 0x42d182c4447097fe, - 0xacdb47e7a6c94a44, - 0x91fb985dbdd6e93b, - 0x4796404dd92f2c3a, - 0xcda282a270d3610f, - 0x29d786ca8abdaf09, - 0x3f9af62d5a02bdc6, - 0x513eb2b11ab80a05, - 0x59a32e0cc53f2c3d, - 0x5b22688cc292ee8c, - 0xd7076df7f4c3b35b, - 0x3dcf8e920a889b72, - 0x6cf0fe53b376b881 -}; - -#endif -int gv = 0; - -/* Set up for one iteration only.*/ -int64 ckv[3] = { - 0x156a0e1af0914226, - 0xa70ebc57a39fd98d, - 0x1513f274d76734c6, -}; - -uint64 do_cksum (uint64 *arr, int64 len) -{ - int64 i, cksum; - - // compute src cksum - for (i = cksum = 0; i < len; i++) - cksum += arr[i]; - return accum_long (cksum); -} - -int main (int argc, char *argv[]) -{ - static char cvs_info[] = "BMkGRP $Date: $ $Revision: $ $RCSfile: all2all_main.c,v $ $Name: $"; - - int itr; - int idx; - brand_t br; - timer t, t0, t1; - double nsec; - - double total_time = 0.0; - - int status = 0; - - int64 i, seed, arg, msize, tsize, len, oldsize=0, rep, cksum; - uint64 *tab=NULL; - - start_pes(0); - SELF=_my_pe(); - SIZE=_n_pes(); - - if (argc < 5) { - if (SELF == 0) - fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " - "[ms2 ts2 rc2 ..]\n", argv[0]); - status = 1; - goto DONE; - } - seed = atol (argv[1]); - if (SELF == 0) - printf ("base seed is %ld\n", seed); - seed += SELF << 32; - brand_init (&br, seed); // seed uniquely per PE - - arg = 2; - - while (arg < argc) { - - - msize = atol (argv[arg++]); if (arg >= argc) break; - /* Table size * 1 million. */ - tsize = atol (argv[arg++]) * (1L << 20); if (arg >= argc) break; - //rep = atol (argv[arg++]); - rep = 1; - arg++; - - if (SELF == 0) printf ("tsize = %ldMB msize = %dB\n", - tsize/(1L<<20), msize); - if (msize < sizeof(long)) { - if (SELF == 0) printf ("msize too short!\n"); - //status = 1; - goto DONE; - } - //itr=0; - - idx = 0; - - switch(SIZE){ - case 2: - idx = 0; - break; - case 4: - idx = 1; - break; - case 8: - idx = 2; - break; - default: - fprintf(stderr,"warning, check sum for (%d) pes not supported.\n", - SIZE); - } - - while (rep-- > 0) { - - /* START TIMING */ - //timer_clear (&t0); - //timer_clear (&t1); - //timer_start (&t0); - - if ((tab == NULL) || (tsize > oldsize)) { - if (tab != NULL) { - dram_shfree (tab); - oldsize = 0; - } - if (SELF == 0) printf ("trying dram_shmalloc of %ld bytes\n", tsize); - tab = (uint64 *) dram_shmalloc (tsize); - - if (tab == NULL) { - if (SELF == 0) printf ("dram_shmalloc failed!\n"); - status = 1; - goto DONE; - - } - oldsize = tsize; - } - - // length in words - len = tsize / sizeof(uint64); - - // important to init table - // to ensure cksum consistency on different platforms - memset(tab,0,tsize); - - for (i = 0; i < len; i+=64){ - tab[i] = brand(&br); - } - - // we'll have destination/source arrays each of half size - len /= 2; - - //timer_stop (&t0); - // source checksum - cksum = do_cksum (&tab[len], len); - if (SELF == 0) printf ("cksum is %016lx\n", cksum); - if (SELF == 0){ - //if(cksum!=ckv[itr++]){ - /* Set up for one iteration only. */ - if(cksum!=ckv[idx]){ - printf ("cksum %016lx != ckv[%d] %016x\n",cksum,idx,ckv[idx]); - gexit(1); - } - - } - - //timer_start (&t1); - len = do_all2all (&tab[0], &tab[len], len, msize/sizeof(uint64)); - - shmem_barrier_all(); - - //timer_stop (&t1); - /* END TIMING */ -#if 0 - - // dest checksum - i = do_cksum (&tab[0], len); - if (i != cksum) { - printf ("PE %4ld ERROR: %016lx != %016lx\n", SIZE, i, cksum); - status = 1; - goto DONE; - } - -#ifndef CHECKOFF - if (i != known_v[gv]) { - printf ("CHECKSUM PE %4ld ERROR: %016lx != %016lx\n", SIZE, i, known_v[gv]); - status = 1; - goto DONE; - } - gv++; -#endif - - - //t.accum_wall = t0.accum_wall + t1.accum_wall; - //t.accum_cpus = t0.accum_cpus + t1.accum_cpus; - - - /*if (SELF == 0) { - -#ifdef PTIMES - printf ("%8.3f %8.3f\n", t0.accum_wall , t1.accum_wall); - printf ("%8.3f %8.3f\n", t0.accum_cpus , t1.accum_cpus); -#endif - printf ("wall reports %8.3f secs cpus report %8.3f secs\n", - t.accum_wall, t.accum_cpus); - nsec = MAX(t.accum_wall, t.accum_cpus); - total_time += nsec; - if (nsec > 0) - printf ("%8.3f MB/sec with %ld bytes transfers\n", - len*sizeof(uint64)/(double)(1L<<20)/nsec, msize); - }*/ -#endif - } - //if (SELF == 0) - //printf ("\n"); - } - //if (SELF == 0) - //{ - //printf ("total time = %14.9f\n", total_time); - - //} - - DONE: - shmem_barrier_all(); - return status; -} - - diff --git a/_deprecated_sources/C/all2all_subs.c b/_deprecated_sources/C/all2all_subs.c deleted file mode 100644 index 362185099..000000000 --- a/_deprecated_sources/C/all2all_subs.c +++ /dev/null @@ -1,56 +0,0 @@ -#include "bench.h" -#include - -#define PERM(ME,TOT,ITER) ((ME)^(ITER)) // ok if 2^n pes - -#define MAX(A,B) (((A)>(B)) ? (A) : (B)) -#define MIN(A,B) (((A)<(B)) ? (A) : (B)) - -int64 do_all2all (uint64 *dst, uint64 *src, int64 len, int64 nwrd); -int64 accum_long (int64 val); - -extern int64 SELF, SIZE; - -/* returns words sent per PE */ - -int64 do_all2all (uint64 *dst, uint64 *src, int64 len, int64 nwrd) -{ -static char cvs_info[] = "BMKGRP $Date: $ $Revision: $ $RCSfile: all2all.c,v $ $Name: $"; - - int64 i, j, pe; - - len = len - (len % (nwrd * SIZE)); // force even multiple - for (i = 0; i < len; i+=SIZE*nwrd) { - shmem_barrier_all(); - for (j = 0; j < SIZE; j++) { - pe = PERM(SELF,SIZE,j); - /* shmem_put (&dst[i + SELF*nwrd], &src[i + pe*nwrd], nwrd, pe);*/ - shmem_put64 (&dst[i + SELF*nwrd], &src[i + pe*nwrd], nwrd, pe); - } - } - return len; -} - -int64 accum_long (int64 val) - -{ - int64 i; - static int64 target, source, init=0; - static int64 Sync[_SHMEM_REDUCE_SYNC_SIZE]; - static int64 Work[2 + _SHMEM_REDUCE_MIN_WRKDATA_SIZE]; - - if (! init) { - /* need to initialize Sync first time around */ - for(i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i++) - Sync[i] = _SHMEM_SYNC_VALUE; - init = 1; - } - source = val; - shmem_barrier_all(); - - shmem_long_sum_to_all (&target, &source, 1, 0, 0, SIZE, Work, Sync); - - shmem_barrier_all(); - return target; -} - diff --git a/_deprecated_sources/C/bench.h b/_deprecated_sources/C/bench.h deleted file mode 100644 index 499fd09d3..000000000 --- a/_deprecated_sources/C/bench.h +++ /dev/null @@ -1,281 +0,0 @@ -#include -#include -#include - - -typedef signed char int8 ; -typedef unsigned char uint8 ; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long int64; -typedef unsigned long uint64; - -/* timing */ - -#include -#include - -/* wall-clock time */ - -static double wall(void) - -{ - struct timeval tp; - - gettimeofday (&tp, NULL); - return - tp.tv_sec + tp.tv_usec/(double)1.0e6; -} - -#include - -/* cpu + system time */ - -static double cpus(void) -{ - struct rusage ru; - - getrusage(RUSAGE_SELF,&ru); - return - (ru.ru_utime.tv_sec + ru.ru_stime.tv_sec) + - (ru.ru_utime.tv_usec + ru.ru_stime.tv_usec)/(double)1.0e6; -} - -typedef struct { - double accum_wall, accum_cpus; - double start_wall, start_cpus; - time_t init_time; - char running; -} timer; - -static void timer_clear (timer *t) -{ - t->accum_wall = t->accum_cpus = 0; - t->start_wall = t->start_cpus = 0; - t->running = 0; -} - -static void timer_start (timer *t) -{ - t->start_wall = wall(); - t->start_cpus = cpus(); - t->running = 1; -} - -static void timer_stop (timer *t) -{ - if (t->running == 0) - return; - t->accum_cpus += cpus() - t->start_cpus; - t->accum_wall += wall() - t->start_wall; - t->running = 0; -} - -static void timer_report (timer *t, double *pwall, double *pcpus, - int64 print) -{ - double w, c; - - w = t->accum_wall; - c = t->accum_cpus; - - if (t->running) { - c += cpus() - t->start_cpus; - w += wall() - t->start_wall; - } - if (print) { - printf ("%7.3f secs of wall clock time\n", w); - printf ("%7.3f secs of cpu and system time\n", c); - } - - if (pwall) *pwall = w; - if (pcpus) *pcpus = c; -} - - -/* some masking macros */ - -#define _ZERO64 0uL -#define _maskl(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) << (64-(x)))) -#define _maskr(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) >> (64-(x)))) -#define _mask(x) (((x) < 64) ? _maskl(x) : _maskr(2*64 - (x))) - -/* PRNG */ - -#define _BR_RUNUP_ 128L -#define _BR_LG_TABSZ_ 7L -#define _BR_TABSZ_ (1L<<_BR_LG_TABSZ_) - -typedef struct { - uint64 hi, lo, ind; - uint64 tab[_BR_TABSZ_]; -} brand_t; - -#define _BR_64STEP_(H,L,A,B) {\ - uint64 x;\ - x = H ^ (H << A) ^ (L >> (64-A));\ - H = L | (x >> (B-64));\ - L = x << (128 - B);\ -} - -static uint64 brand (brand_t *p) { - uint64 hi=p->hi, lo=p->lo, i=p->ind, ret; - - ret = p->tab[i]; - - _BR_64STEP_(hi,lo,45,118); - - p->tab[i] = ret + hi; - - p->hi = hi; - p->lo = lo; - p->ind = hi & _maskr(_BR_LG_TABSZ_); - - return ret; -} - -static void brand_init (brand_t *p, uint64 val) - -{ - int64 i; - uint64 hi, lo; - - hi = 0x9ccae22ed2c6e578uL ^ val; - lo = 0xce4db5d70739bd22uL & _maskl(118-64); - - for (i = 0; i < 64; i++) - _BR_64STEP_(hi,lo,33,118); - - for (i = 0; i < _BR_TABSZ_; i++) { - _BR_64STEP_(hi,lo,33,118); - p->tab[i] = hi; - } - p->ind = _BR_TABSZ_/2; - p->hi = hi; - p->lo = lo; - - for (i = 0; i < _BR_RUNUP_; i++) - brand(p); -} - -/* init / end subroutines */ - -/* prints information, initializes PRNG, returns number of iterations */ - -#define INIT_ST "INIT>" -#define END_ST "END>" -#define MAX_HOST 80L - -static int64 bench_init (int argc, char *argv[], brand_t *br, - timer *t, char *more_args) -{ - uint64 seed; - int64 niters; - int i; - time_t c; - static char host[MAX_HOST]; - - if ((i = sizeof(void *)) != 8) { - printf ("error: sizeof(void *) = %d\n", i); - exit(1); - } - if ((i = sizeof(long)) != 8) { - printf ("error: sizeof(long) = %d\n", i); - exit(1); - } - if ((i = sizeof(int)) != 4) { - printf ("error: sizeof(int) = %d\n", i); - exit(1); - } - - if (argc < 3) { - /* prog seed iters [... other args] */ - printf ("Usage:\t%s seed iters %s\n", - argv[0], (more_args != NULL) ? more_args : ""); - exit(0); - } - - printf ("\n===================================================\n\n"); - - /* print start time of day */ - time (&c); - printf ("%s %s started at: %s", INIT_ST, argv[0], ctime(&c)); - t->init_time = c; - - gethostname (host, MAX_HOST); - printf ("%s host machine is %s\n", INIT_ST, host); - - printf ("%s program built on %s @ %s\n", - INIT_ST, __DATE__, __TIME__); - - seed = atol (argv[1]); - niters = atol (argv[2]); - - printf ("%s seed is %ld niters is %ld\n", INIT_ST, seed, niters); - if (argc > 3) { - printf ("%s other args: ", INIT_ST); - argv += 3; - while (*argv) - printf (" %s", *argv++); - printf ("\n"); - } - - if (br != NULL) - brand_init (br, seed); - - if (t != NULL) - timer_clear (t); - - printf ("\n"); - - return niters; -} - -static void bench_end (timer *t, int64 iters, char *work) -{ - time_t c; - double wall, cpus, rate; - - printf ("\n"); - - /* print end time of day */ - time (&c); - printf ("%s ended at: %s", END_ST, ctime(&c)); - c = c - t->init_time; - printf ("%s elapsed time is %d seconds\n", END_ST, c); - - if (t != NULL) { - timer_report(t, &wall, &cpus, 0); - - printf ("%s %7.3f secs of wall time ", - END_ST, wall); - if (c <= 0) c = 1; - printf ("%7.3f%% of value reported by time()\n", wall/c*100.); - - if (wall <= 0) wall = 0.0001; - printf ("%s %7.3f secs of cpu+sys time utilization = %5.3f%%\n", - END_ST, cpus, cpus/wall*100.); - - if (cpus > (wall+.01)) - printf ("this result is suspicious since cpu+system > wall\n"); - if ((iters > 0) && (work != NULL)) { - const char *units[4] = {"", "K", "M", "G"}; - int i = 0; - - rate = iters/wall; - while (i < 3) { - if (rate > 999.999) { - rate /= 1024.; - i++; - } - else - break; - } - - printf ("%s %8.4f %s %s per second\n", - END_ST, rate, units[i], work); - } - } -} diff --git a/_deprecated_sources/C/circ.c b/_deprecated_sources/C/circ.c deleted file mode 100644 index 14781051f..000000000 --- a/_deprecated_sources/C/circ.c +++ /dev/null @@ -1,12 +0,0 @@ -/* circular shift bbb into aaa */ -#include - -int aaa, bbb; - -int main (int argc, char * argv[]) -{ - start_pes(0); - shmem_int_get(&aaa, &bbb, 1,(_my_pe() + 1)% _num_pes()); - shmem_barrier_all(); - return 0; -} diff --git a/_deprecated_sources/C/cshift.c b/_deprecated_sources/C/cshift.c deleted file mode 100644 index ec4a7d4d2..000000000 --- a/_deprecated_sources/C/cshift.c +++ /dev/null @@ -1,11 +0,0 @@ -/* circular shift bbb into aaa */ -#include -int aaa, bbb; -int main (int argc, char * argv[]) -{ - start_pes(0); - shmem_int_get(&aaa, &bbb, 1,(_my_pe() + 1)% _num_pes()); - shmem_barrier_all(); - return 0; -} - diff --git a/_deprecated_sources/C/helloworld.c b/_deprecated_sources/C/helloworld.c deleted file mode 100644 index 3bd870d6a..000000000 --- a/_deprecated_sources/C/helloworld.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(int argc, char* argv[]) -{ - int me, my_num_pes; - /* - ** Starts/Initializes SHMEM/OpenSHMEM - */ - start_pes(0); - /* - ** Fetch the number or processes - ** Some implementations use num_pes(); - */ - my_num_pes = _num_pes(); - /* - ** Assign my process ID to me - */ - me = _my_pe(); - printf("Hello World from %d of %d\n", me, my_num_pes); - return 0; -} diff --git a/_deprecated_sources/C/iput.c b/_deprecated_sources/C/iput.c deleted file mode 100644 index f36052d86..000000000 --- a/_deprecated_sources/C/iput.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * This program is an adaptation of examples found in the man pages - * of SGI's SHMEM implementation. - * - * In this program, iput is used to select 5 elements from array source separated by - * a stride of 2 and write them to array target using a stride of 1. - * - * Given the array source = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 } - * iput will select 5 elements from array source on PE 0, using a stride of 2: - * - * selected elements = { 1, 3, 5, 7, 9 } - * - * These elements will then be written to the array source on PE 1 using a stride of 1: - * - * target = { 1, 3, 5, 7, 9 } - * - */ - -#include -#include - -int -main(void) -{ - short source[10] = { 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10 }; - static short target[10]; - int me; - - start_pes(0); - me = _my_pe(); - - if (me == 0) { - /* put 10 words into target on PE 1 */ - shmem_short_iput(target, source, 1, 2, 5, 1); - } - - shmem_barrier_all(); /* sync sender and receiver */ - - if (me == 1) { - printf("target on PE %d is %hd %hd %hd %hd %hd\n", me, - target[0], target[1], target[2], - target[3], target[4] ); - } - shmem_barrier_all(); /* sync before exiting */ - - return 0; -} diff --git a/_deprecated_sources/C/just_shmalloc.c b/_deprecated_sources/C/just_shmalloc.c deleted file mode 100644 index 9b17c442e..000000000 --- a/_deprecated_sources/C/just_shmalloc.c +++ /dev/null @@ -1,21 +0,0 @@ -/* - * just do a shmalloc and a free, no output to be expected - */ - -#include - -int -main() -{ - long *x; - - start_pes(0); - - x = (long *) shmalloc(sizeof(*x)); - - shfree(x); - shmem_barrier_all(); - - return 0; -} - diff --git a/_deprecated_sources/C/mpp_bench.h b/_deprecated_sources/C/mpp_bench.h deleted file mode 100755 index 08cb2576f..000000000 --- a/_deprecated_sources/C/mpp_bench.h +++ /dev/null @@ -1,115 +0,0 @@ -#include -#include -#include -#include -#include - -// LP64 types - -typedef signed char int8 ; -typedef unsigned char uint8 ; -typedef short int16; -typedef unsigned short uint16; -typedef int int32; -typedef unsigned int uint32; -typedef long int64; -typedef unsigned long uint64; - -// timing - -#include - -typedef struct { - double accum_wall, accum_cpus; - double start_wall, start_cpus; - time_t init_time; - char running; -} timer; - -void timer_clear (timer *t); -void timer_start (timer *t); -void timer_stop (timer *t); -void timer_report (timer *t, double *pwall, double *pcpus, int64 print); - -// some masking macros - -#define _ZERO64 0uL -#define _maskl(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) << (64-(x)))) -#define _maskr(x) (((x) == 0) ? _ZERO64 : ((~_ZERO64) >> (64-(x)))) -#define _mask(x) (((x) < 64) ? _maskl(x) : _maskr(2*64 - (x))) - -// PRNG - -#define _BR_RUNUP_ 128L -#define _BR_LG_TABSZ_ 7L -#define _BR_TABSZ_ (1L<<_BR_LG_TABSZ_) - -typedef struct { - uint64 hi, lo, ind; - uint64 tab[_BR_TABSZ_]; -} brand_t; - -#define _BR_64STEP_(H,L,A,B) {\ - uint64 x;\ - x = H ^ (H << A) ^ (L >> (64-A));\ - H = L | (x >> (B-64));\ - L = x << (128 - B);\ -} - -uint64 brand (brand_t *p); -void brand_init (brand_t *p, uint64 val); - -// upc/shmem functionality - -// max size of tab (in wrds) -#ifndef LG_NWRDS -//#define LG_NWRDS 27L -// HACK: can't upc_all_alloc more than 2^24 words /PE -#define LG_NWRDS 24L -#endif -#define NWRDS (1uL << LG_NWRDS) - -#if defined(__UPC__) -#include -#include -#define MY_GTHREAD MYTHREAD -#define GTHREADS THREADS -#define TYPE shared uint64 - -#define mpp_init() // no-op -#define mpp_barrier_all() upc_barrier -#define mpp_exit(status) upc_global_exit(status) - -#else - -#if defined(CRAY_XT) -#include -#else -#include -#endif - -#define MY_GTHREAD shmem_my_pe() -#define GTHREADS shmem_n_pes() -#define TYPE uint64 - -#define mpp_init shmem_init -#define mpp_barrier_all shmem_barrier_all - -#if defined(CRAY_XT) -#define mpp_exit(status) shmem_finalize() -#else -#define mpp_exit(status) globalexit(status) -#endif - -#endif - -TYPE *mpp_alloc (int64 nbytes); -void mpp_free (TYPE *ptr); -int64 mpp_accum_long (int64 val); -void do_sync_init (void); -void mpp_put (TYPE *dst, uint64 *src, int64 nelem, int64 pe); -void mpp_broadcast (TYPE *dst, TYPE *src, int64 nelem, int64 root); -int64 all2all (TYPE *dst, uint64 *src, int64 len, int64 nwrd); -void do_all2all (TYPE *tab, uint64 *loc, brand_t *br, int64 msize, - int64 tsize, int64 rep, int64 print); -void do_warmup (brand_t *br); diff --git a/_deprecated_sources/C/pi.c b/_deprecated_sources/C/pi.c deleted file mode 100644 index a1d7fe61e..000000000 --- a/_deprecated_sources/C/pi.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * This file is distributed as part of GatorSHMEM, a project of the HCS - * Research Lab / CHREC at the University of Florida. - * - * Copyright (c) 2005-2010, the University of Florida. - * All rights reserved. - * - * Modified by SPoole from ORNL to be OpenSHMEM V1.0 compliant - * and work with other architectures. - * - */ -#include -#include -#include -#include -#include -#include - -#define M_PI_2 1.57079632679489661923 -#define TRIES 1000000000 - - -double timerval() -{ - struct timeval st; - gettimeofday ( &st, NULL); - return st.tv_sec + st.tv_usec * 1e-6; -} - - -int main(int argc, char *argv[]) -{ -/* 1. get random [0, 1] ==> - 2. get random [0, pi/2] ==>theta - 3. hit X < sin(theta) - 4. 2/pi = hit/tries. -*/ - double X, Theta, My_pi; - double Tstart, Tend; - int i, total, hit=0; - int *buf, my_mem; - int rank, numprocs, num_of_procs; - - num_of_procs = atoi(argv[1]); - - start_pes (0); - - numprocs = _num_pes(); - rank = _my_pe(); - - my_mem = (sizeof(int) * numprocs); - buf = shmalloc(my_mem); - - srand( (unsigned int) time(NULL)); - - if ( rank == 0 ) { - printf("pi is %f\n", M_PI_2 ); - printf("sin(pi/2) is %f\n", sin(M_PI_2)); - fflush(stdout); - } - - Tstart = timerval(); - - if ( rank != 0 ) { - total = TRIES/(numprocs-1); - if (rank == 1) - total += TRIES % (numprocs-1); - - srand( (unsigned int) time(NULL)); - - for ( i = 0; i < total ; i++){ - X = rand(); - X = X/RAND_MAX; - - Theta = rand(); - Theta = ( M_PI_2 ) * (Theta/RAND_MAX); - if ( X < sin(Theta)) - hit++; - } - buf[0] = hit; - } - - shmem_barrier_all(); - - if ( rank == 0 ) - for ( i = 1; i < numprocs; i++) { - shmem_getmem(buf, buf, sizeof(int), i); - hit += buf[0]; - printf("from node(%d), getmem buf\t %d, so hit is\t %d\n", i, buf[0], hit); - } - - shmem_barrier_all(); - - if ( rank == 0 ) { - My_pi = 2 * ( 1 / ( ((double)hit)/TRIES ) ); - Tend = timerval(); - printf("Hit is : %d :: Total is %d \n",hit, TRIES); - printf("My pi is %.16f \n", My_pi); - printf("Elapsed time is %f \n", Tend - Tstart); - } - - shmem_barrier_all(); - - return 0; -} diff --git a/_deprecated_sources/C/ping.c b/_deprecated_sources/C/ping.c deleted file mode 100644 index 803080d40..000000000 --- a/_deprecated_sources/C/ping.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * test if PE is accessible - * - */ - -#include - -#include - -int -main(void) -{ - int me, npes; - - setbuf(stdout, NULL); - - start_pes(0); - me = _my_pe(); - npes = _num_pes(); - - if (me == 0) { - int i; - for (i = 1; i < npes; i += 1) { - printf("From %d: PE %d is ", me, i); - printf("%s", shmem_pe_accessible(i) ? "" : "NOT "); - printf("accessible\n"); - } - } - - return 0; -} diff --git a/_deprecated_sources/C/reduce-max.c b/_deprecated_sources/C/reduce-max.c deleted file mode 100644 index 353f7ecd3..000000000 --- a/_deprecated_sources/C/reduce-max.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * reduce [0,1,2] + _my_pe() across 4 PEs with MAX() - * - * - */ - -#include -#include - -#include - -long pSync[_SHMEM_BCAST_SYNC_SIZE]; - -#define N 3 - -long src[N]; -long dst[N]; -long pWrk[_SHMEM_REDUCE_SYNC_SIZE]; - -int -main(void) -{ - int i; - - for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { - pSync[i] = _SHMEM_SYNC_VALUE; - } - - start_pes(0); - - for (i = 0; i < N; i += 1) { - src[i] = _my_pe() + i; - } - shmem_barrier_all(); - - shmem_long_max_to_all(dst, src, N, 0, 0, _num_pes(), pWrk, pSync); - - printf("%d/%d dst =", _my_pe(), _num_pes() ); - for (i = 0; i < N; i+= 1) { - printf(" %ld", dst[i]); - } - printf("\n"); - - return 0; -} diff --git a/_deprecated_sources/C/shmalloc.c b/_deprecated_sources/C/shmalloc.c deleted file mode 100644 index a10b30141..000000000 --- a/_deprecated_sources/C/shmalloc.c +++ /dev/null @@ -1,18 +0,0 @@ -/* -* OpenSHMEM program to allocate (shmalloc) symmetric memory (1 long integer), -* and then free it. Success of allocation is untested. -* -* This program produces no output. -*/ - -#include - -int -main(void) -{ - long *x; - start_pes(0); - x = (long *) shmalloc(sizeof(*x)); - shfree(x); - return 0; -} diff --git a/_deprecated_sources/C/shmem_long_finc_only.c b/_deprecated_sources/C/shmem_long_finc_only.c deleted file mode 100644 index 1798df225..000000000 --- a/_deprecated_sources/C/shmem_long_finc_only.c +++ /dev/null @@ -1,104 +0,0 @@ -/********************************************************************** -!---------------------------------------------------------------------- -! Copyright (c) 2010, Cray Inc. -! All rights reserved. -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions are -! met: -! -! * Redistributions of source code must retain the above copyright -! notice, this list of conditions and the following disclaimer. -! -! * Redistributions in binary form must reproduce the above copyright -! notice, this list of conditions and the following disclaimer in the -! documentation and/or other materials provided with the distribution. -! -! * Neither the name Cray Inc. nor the names of its contributors may be -! used to endorse or promote products derived from this software -! without specific prior written permission. - -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -! "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -! LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -! A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -! OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -! SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -! LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -! DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -! THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -! (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -!---------------------------------------------------------------------- -! -! Purpose: Functional tests for the following shmem_finc routines --- -! shmem_long_finc -! -!*********************************************************************/ -#include -#include -#include - -#define ITER 50 - -long count; - -int main(int argc, char **argv) -{ - int i,j; - long modj,oldj,oldxmodj; - int my_pe,n_pes; - size_t max_elements_bytes; - static long *x; - - start_pes(0); - my_pe = shmem_my_pe(); - n_pes = shmem_n_pes(); -#ifdef HAVE_SET_CACHE_INV - shmem_set_cache_inv(); -#endif - -/* fail if trying to use only one processor */ - if ( n_pes <= 1 ){ - fprintf(stderr, "FAIL - test requires at least two PEs\n"); - exit(1); - } - - if(my_pe == 0) - fprintf(stderr, "shmem_long_finc(%s) n_pes=%d\n", argv[0],n_pes); - -/* shmalloc x on all pes (only use the one on PE 0) */ - - max_elements_bytes = (size_t) (sizeof(long) * n_pes); - x = (long *)shmalloc( max_elements_bytes ); - for(i=0; i -#include - -int main (int argc, char *argv[]) - -{ - brand_t br; - int64 seed, arg, msize, tsize, rep; - TYPE *tab; - uint64 *loc; - - mpp_init(); - - if (argc < 5) { - if (MY_GTHREAD == 0) - fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " - "[ms2 ts2 rc2 ..]\n", argv[0]); - goto DONE; - } - - // alloc some shared space - // (checks for valid pointer and casts) - tab = mpp_alloc (NWRDS * sizeof(uint64)); - - // pointer to local space -#if defined(__UPC__) - loc = (uint64 *)&tab[MY_GTHREAD]; - -#else - loc = &tab[0]; - -#endif - - // init all local memory - bzero ((void *)&loc[0], NWRDS * sizeof(uint64)); - - seed = atol (argv[1]); - if (MY_GTHREAD == 0) - printf ("base seed is %ld\n", seed); - seed += (uint64)MY_GTHREAD << 32; - brand_init (&br, seed); // seed uniquely per PE - - arg = 2; - while (arg < argc) { - msize = atol (argv[arg++]); - if (arg >= argc) - break; - tsize = atol (argv[arg++]) * (1L << 20); - if (arg >= argc) - break; - rep = atol (argv[arg++]); - - if (MY_GTHREAD == 0) - printf ("tsize = %ldMB msize = %5ldB\n", tsize/(1L<<20), msize); - if (msize < sizeof(long)) { - if (MY_GTHREAD == 0) - printf ("msize must be > %ld B\n", (int64)sizeof(long)); - goto DONE; - } - if (tsize > (NWRDS * sizeof(long))) { - if (MY_GTHREAD == 0) - printf ("tsize must be < %ld MiB\n", - (int64)(NWRDS * sizeof(long)) / (1uL<<20)); - goto DONE; - } - - // exits on error - do_all2all (tab, loc, &br, msize, tsize, rep, 1); - - if (MY_GTHREAD == 0) - printf ("\n"); - } - - // free up the shared memory - mpp_free (tab); - - DONE: - mpp_barrier_all(); - return 0; -} diff --git a/_deprecated_sources/Diagrams_for_Spec1.1.pptx b/_deprecated_sources/Diagrams_for_Spec1.1.pptx deleted file mode 100644 index aa93a4f43..000000000 Binary files a/_deprecated_sources/Diagrams_for_Spec1.1.pptx and /dev/null differ diff --git a/_deprecated_sources/EXAMPLES/Makefile b/_deprecated_sources/EXAMPLES/Makefile deleted file mode 100644 index d581b0f8b..000000000 --- a/_deprecated_sources/EXAMPLES/Makefile +++ /dev/null @@ -1,84 +0,0 @@ -SHELL = /bin/sh - -CC = oshcc -CFLAGS = -Wall -O3 - -RUNCMD=oshrun -NPROCOPT=-np - -C_TESTS = $(wildcard *.c) -C_EXES = $(C_TESTS:.c=) -EXES = $(C_EXES) - -all default: $(EXES) - -run-hello: hello-openshmem - $(RUNCMD) $(NPROCOPT) 2 ./hello-openshmem - -run-cswap: shmem_cswap_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_cswap_example - -run-fadd: shmem_fadd_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_fadd_example - -run-finc: shmem_finc_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_finc_example - -run-g: shmem_g_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_g_example - -run-inc: shmem_inc_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_inc_example - -run-iput: shmem_iput_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_iput_example - -run-lock: shmem_lock_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_lock_example - -run-mype: shmem_mype_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_mype_example - -run-npes: shmem_npes_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_npes_example - -run-p: shmem_p_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_p_example - -run-ptr: shmem_ptr_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_ptr_example - -run-put: shmem_put_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_put_example - -run-shmalloc: shmem_shmalloc_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_shmalloc_example - -run-swap: shmem_swap_example - $(RUNCMD) $(NPROCOPT) 2 ./shmem_swap_example - -run-writing: writing_shmem_example - $(RUNCMD) $(NPROCOPT) 2 ./writing_shmem_example - -run-barrierall: shmem_barrierall_example - $(RUNCMD) $(NPROCOPT) 4 ./shmem_barrierall_example - -run-barrier: shmem_barrier_example - $(RUNCMD) $(NPROCOPT) 4 ./shmem_barrier_example - -run-broadcast: shmem_broadcast_example - $(RUNCMD) $(NPROCOPT) 4 ./shmem_broadcast_example - -run-collect: shmem_collect_example - $(RUNCMD) $(NPROCOPT) 4 ./shmem_collect_example - -run-fence: shmem_fence_example - $(RUNCMD) $(NPROCOPT) 4 ./shmem_fence_example - -run-quiet: shmem_quiet_example - $(RUNCMD) $(NPROCOPT) 4 ./shmem_quiet_example - -run: all run-hello run-barrierall run-barrier run-broadcast run-collect run-cswap run-fadd run-fence run-finc run-g run-inc run-iput run-lock run-mype run-npes run-p run-ptr run-put run-quiet run-shmalloc run-swap run-writing - -clean: - rm -f $(EXES) diff --git a/_deprecated_sources/EXAMPLES/hello-openshmem-c.output b/_deprecated_sources/EXAMPLES/hello-openshmem-c.output deleted file mode 100644 index 5509dc3b2..000000000 --- a/_deprecated_sources/EXAMPLES/hello-openshmem-c.output +++ /dev/null @@ -1,4 +0,0 @@ -Hello from 0 of 4 -Hello from 2 of 4 -Hello from 3 of 4 -Hello from 1 of 4 diff --git a/_deprecated_sources/EXAMPLES/hello-openshmem-f90.output b/_deprecated_sources/EXAMPLES/hello-openshmem-f90.output deleted file mode 100644 index e2f8b4075..000000000 --- a/_deprecated_sources/EXAMPLES/hello-openshmem-f90.output +++ /dev/null @@ -1,4 +0,0 @@ -Hello from 0 of 4 -Hello from 2 of 4 -Hello from 3 of 4 -Hello from 1 of 4 diff --git a/_deprecated_sources/EXAMPLES/hello-openshmem.c b/_deprecated_sources/EXAMPLES/hello-openshmem.c deleted file mode 100644 index 8102cee6d..000000000 --- a/_deprecated_sources/EXAMPLES/hello-openshmem.c +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include /* The shmem header file */ - -int -main (int argc, char *argv[]) -{ - int nprocs, me; - - shmem_init (); - nprocs = shmem_n_pes (); - me = shmem_my_pe (); - printf ("Hello from %d of %d\n", me, nprocs); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/hello-openshmem.f90 b/_deprecated_sources/EXAMPLES/hello-openshmem.f90 deleted file mode 100644 index c3d16efd4..000000000 --- a/_deprecated_sources/EXAMPLES/hello-openshmem.f90 +++ /dev/null @@ -1,16 +0,0 @@ -program hello - - include 'shmem.fh' - integer :: shmem_my_pe, shmem_n_pes - - integer :: npes, me - - call shmem_init () - npes = shmem_n_pes () - me = shmem_my_pe () - - write (*, 1000) me, npes - - 1000 format ('Hello from', 1X, I4, 1X, 'of', 1X, I4) - -end program hello diff --git a/_deprecated_sources/EXAMPLES/shmem_add_example.c b/_deprecated_sources/EXAMPLES/shmem_add_example.c deleted file mode 100644 index d7c02c2c1..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_add_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(void) -{ - int me, old; - static int dst; - - shmem_init(); - me = shmem_my_pe(); - - old = -1; - dst = 22; - shmem_barrier_all(); - - if (me == 1){ - old = shmem_int_fadd(&dst, 44, 0); - } - shmem_barrier_all(); - printf("%d: old = %d, dst = %d\n", me, old, dst); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_and_example.f90 b/_deprecated_sources/EXAMPLES/shmem_and_example.f90 deleted file mode 100644 index 6f9c339b3..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_and_example.f90 +++ /dev/null @@ -1,22 +0,0 @@ -INCLUDE "shmem.fh" - -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -INTEGER*4 PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -INTEGER FOO, FOOAND -SAVE FOO, FOOAND, PWRK -INTRINSIC SHMEM_MY_PE() - -FOO = SHMEM_MY_PE() -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - IF ( MOD(SHMEM_N_PES()(),2) .EQ. 0) THEN - CALL SHMEM_INT8_AND_TO_ALL(FOOAND, FOO, NR, 0, 1, NPES/2, & - PWRK, PSYNC) - ELSE - CALL SHMEM_INT8_AND_TO_ALL(FOOAND, FOO, NR, 0, 1, NPES/2+1, & - PWRK, PSYNC) - - ENDIF - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOAND -ENDIF diff --git a/_deprecated_sources/EXAMPLES/shmem_barrier_example.c b/_deprecated_sources/EXAMPLES/shmem_barrier_example.c deleted file mode 100644 index 5b205d73a..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_barrier_example.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - -long pSync[_SHMEM_BARRIER_SYNC_SIZE]; -int x = 10101; - -int main(void) -{ - int i, me, npes; - - for (i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; i += 1){ - pSync[i] = _SHMEM_SYNC_VALUE; - } - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - - if(me % 2 == 0){ - x = 1000 + me; - /*put to next even PE in a circular fashion*/ - shmem_int_p(&x, 4, (me+2)%npes); - /*synchronize all even pes*/ - shmem_barrier(0, 1, (npes/2 + npes%2), pSync); - } - printf("%d: x = %d\n", me, x); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_barrierall_example.c b/_deprecated_sources/EXAMPLES/shmem_barrierall_example.c deleted file mode 100644 index 5e4f430a9..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_barrierall_example.c +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include - -int x=1010; - -int main(void) -{ - int me, npes; - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - - /*put to next PE in a circular fashion*/ - shmem_int_p(&x, 4, (me+1)%npes); - /*synchronize all PEs*/ - shmem_barrier_all(); - - printf("%d: x = %d\n", me, x); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_broadcast_example.c b/_deprecated_sources/EXAMPLES/shmem_broadcast_example.c deleted file mode 100644 index c32212bd2..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_broadcast_example.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include - -#define NUM_ELEMS 4 -long pSync[_SHMEM_BCAST_SYNC_SIZE]; -long source[NUM_ELEMS], dest[NUM_ELEMS]; - -int main(void) -{ - int i, me, npes; - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - - if (me == 0) - for (i = 0; i < NUM_ELEMS; i++) - source[i] = i; - for (i=0; i < _SHMEM_BCAST_SYNC_SIZE; i++) { - pSync[i] = _SHMEM_SYNC_VALUE; - } - shmem_barrier_all(); /* Wait for all PEs to initialize pSync */ - - shmem_broadcast64(dest, source, NUM_ELEMS, 4, 5, 0, 3, pSync); - printf("%d: %ld", me, dest[0]); - for (i = 1; i < NUM_ELEMS; i++) - printf(", %ld", dest[i]); - printf("\n"); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_broadcast_example.f90 b/_deprecated_sources/EXAMPLES/shmem_broadcast_example.f90 deleted file mode 100644 index 1f914bc25..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_broadcast_example.f90 +++ /dev/null @@ -1,10 +0,0 @@ -INTEGER PSYNC(SHMEM_BCAST_SYNC_SIZE) -INTEGER DEST, SOURCE, NLONG, PE_ROOT, PE_START, -& LOGPE_STRIDE, PE_SIZE, PSYNC -COMMON /COM/ DEST, SOURCE - -DATA PSYNC /SHMEM_BCAST_SYNC_SIZE*SHMEM_SYNC_VALUE/ - -CALL SHMEM_BROADCAST64(DEST, SOURCE, NLONG, 0, 4, 0, 4, PSYNC) - - diff --git a/_deprecated_sources/EXAMPLES/shmem_collect_example.c b/_deprecated_sources/EXAMPLES/shmem_collect_example.c deleted file mode 100644 index 0461f2e03..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_collect_example.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include - -long pSync[_SHMEM_COLLECT_SYNC_SIZE]; -int source[2]; - -int main(void) -{ - int i, me, npes; - int *dest; - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - - source[0] = me * 2; - source[1] = me * 2 + 1; - dest = (int *)shmem_malloc(sizeof(int) * npes * 2); - for (i=0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) { - pSync[i] = _SHMEM_SYNC_VALUE; - } - shmem_barrier_all(); /* Wait for all PEs to initialize pSync */ - - shmem_collect32(dest, source, 2, 0, 0, npes, pSync); - printf("%d: %d", me, dest[0]); - for (i = 1; i < npes * 2; i++) - printf(", %d", dest[i]); - printf("\n"); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_collect_example.f90 b/_deprecated_sources/EXAMPLES/shmem_collect_example.f90 deleted file mode 100644 index 4fab7afde..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_collect_example.f90 +++ /dev/null @@ -1,5 +0,0 @@ -INTEGER PSYNC(SHMEM_COLLECT_SYNC_SIZE) -DATA PSYNC /SHMEM_COLLECT_SYNC_SIZE*SHMEM_SYNC_VALUE/ - -CALL SHMEM_COLLECT4(DEST, SOURCE, 64, PE_START, LOGPE_STRIDE, -& PE_SIZE, PSYNC) diff --git a/_deprecated_sources/EXAMPLES/shmem_cswap_example.c b/_deprecated_sources/EXAMPLES/shmem_cswap_example.c deleted file mode 100644 index f49e79d2d..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_cswap_example.c +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include - -int main(void) -{ - static int race_winner = -1; - int oldval; - shmem_init(); - oldval = shmem_int_cswap(&race_winner, -1, shmem_my_pe(), 0); - if(oldval == -1) printf("pe %d was first\n",shmem_my_pe()); - return 1; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_fadd_example.c b/_deprecated_sources/EXAMPLES/shmem_fadd_example.c deleted file mode 100644 index d7c02c2c1..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_fadd_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(void) -{ - int me, old; - static int dst; - - shmem_init(); - me = shmem_my_pe(); - - old = -1; - dst = 22; - shmem_barrier_all(); - - if (me == 1){ - old = shmem_int_fadd(&dst, 44, 0); - } - shmem_barrier_all(); - printf("%d: old = %d, dst = %d\n", me, old, dst); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_fence_example.c b/_deprecated_sources/EXAMPLES/shmem_fence_example.c deleted file mode 100644 index b5ddbaa88..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_fence_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -long dest[10] = {0}; -int targ = 0; - -int main(void) -{ - long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - int src = 99; - shmem_init(); - if (shmem_my_pe() == 0) { - shmem_long_put(dest, source, 10, 1); /*put1*/ - shmem_long_put(dest, source, 10, 2); /*put2*/ - shmem_fence(); - shmem_int_put(&targ, &src, 1, 1); /*put3*/ - shmem_int_put(&targ, &src, 1, 2); /*put4*/ - } - shmem_barrier_all(); /* sync sender and receiver */ - printf("dest[0] on PE %d is %ld\n", shmem_my_pe(), dest[0]); - return 1; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_finalize_example.c b/_deprecated_sources/EXAMPLES/shmem_finalize_example.c deleted file mode 100644 index 0412810de..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_finalize_example.c +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include - -long x = 10101; - -int main(void) { - int me, npes; - long y = -1; - - shmem_init(); - - me = shmem_my_pe(); - npes = shmem_n_pes(); - if (me == 0) - y = shmem_long_g(&x, npes-1); - - printf("%d: y = %ld\n", me, y); - - shmem_finalize(); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_g_example.c b/_deprecated_sources/EXAMPLES/shmem_g_example.c deleted file mode 100644 index 0fb01446a..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_g_example.c +++ /dev/null @@ -1,21 +0,0 @@ -#include -#include - -long x = 10101; - -int main(void) -{ - int me, npes; - long y = -1; - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - - if (me == 0) - y = shmem_long_g(&x, npes-1); - - printf("%d: y = %ld\n", me, y); - - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_get_example.f90 b/_deprecated_sources/EXAMPLES/shmem_get_example.f90 deleted file mode 100644 index 08b165d74..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_get_example.f90 +++ /dev/null @@ -1,15 +0,0 @@ -PROGRAM REDUCTION -REAL VALUES, SUM -COMMON /C/ VALUES -REAL WORK -CALL SHMEM_INIT() ! ALLOW ANY NUMBER OF PES -VALUES = SHMEM_MY_PE() ! INITIALIZE IT TO SOMETHING -CALL SHMEM_BARRIER_ALL -SUM = 0.0 -DO I = 0, SHMEM_N_PES()-1 - CALL SHMEM_REAL_GET(WORK, VALUES, (SHMEM_N_PES()()-1), I) - SUM = SUM + WORK -ENDDO -PRINT*,'PE ',SHMEM_MY_PE(),' COMPUTED SUM=',SUM -CALL SHMEM_BARRIER_ALL -END diff --git a/_deprecated_sources/EXAMPLES/shmem_global_exit_example.c b/_deprecated_sources/EXAMPLES/shmem_global_exit_example.c deleted file mode 100644 index 71c5f9fef..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_global_exit_example.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include -#include - -int -main(void) -{ - int me, npes; - - shmem_init(); - - me = shmem_my_pe(); - npes = shmem_n_pes(); - - if (me == 0) { - FILE *fp = fopen("input.txt", "r"); - - if (fp == NULL) { /* Input file required by program is not available */ - shmem_global_exit(EXIT_FAILURE); - } - - /* do something with the file */ - - fclose(fp); - } - - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_iget_example.f90 b/_deprecated_sources/EXAMPLES/shmem_iget_example.f90 deleted file mode 100644 index 019e9ca0d..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_iget_example.f90 +++ /dev/null @@ -1,11 +0,0 @@ -PROGRAM STRIDELOGICAL -LOGICAL SOURCE(10), DEST(5) -SAVE SOURCE ! SAVE MAKES IT REMOTELY ACCESSIBLE -DATA SOURCE /.T.,.F.,.T.,.F.,.T.,.F.,.T.,.F.,.T.,.F./ -DATA DEST / 5*.F. / -CALL SHMEM_INIT() -IF (SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_LOGICAL_IGET(DEST, SOURCE, 1, 2, 5, 1) - PRINT*,'DEST AFTER SHMEM_LOGICAL_IGET:',DEST -ENDIF -CALL SHMEM_BARRIER_ALL diff --git a/_deprecated_sources/EXAMPLES/shmem_init_example.f90 b/_deprecated_sources/EXAMPLES/shmem_init_example.f90 deleted file mode 100644 index 65b32f5db..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_init_example.f90 +++ /dev/null @@ -1,21 +0,0 @@ -PROGRAM PUT - -INTEGER TARG, SRC, RECEIVER, BAR -COMMON /T/ TARG -PARAMETER (RECEIVER=1) -CALL SHMEM_INIT() - -IF (SHMEM_MY_PE() .EQ. 0) THEN - SRC = 33 - CALL SHMEM_INTEGER_PUT(TARG, SRC, 1, RECEIVER) -ENDIF - -CALL SHMEM_BARRIER_ALL ! SYNCHRONIZES SENDER AND RECEIVER - -IF (SHMEM_MY_PE() .EQ. RECEIVER) THEN - PRINT*,'PE ', SHMEM_MY_PE(),' TARG=',TARG,' (expect 33)' -ENDIF - -CALL SHMEM_FINALIZE() - -END diff --git a/_deprecated_sources/EXAMPLES/shmem_iput_example.c b/_deprecated_sources/EXAMPLES/shmem_iput_example.c deleted file mode 100644 index d48f88176..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_iput_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(void) -{ - short source[10] = { 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10 }; - static short dest[10]; - shmem_init(); - if (shmem_my_pe() == 0) { - /* put 5 words into dest on PE 1 */ - shmem_short_iput(dest, source, 1, 2, 5, 1); - } - shmem_barrier_all(); /* sync sender and receiver */ - if (shmem_my_pe() == 1) { - printf("dest on PE %d is %d %d %d %d %d\n", shmem_my_pe(), - (int)dest[0], (int)dest[1], (int)dest[2], - (int)dest[3], (int)dest[4] ); - } - shmem_barrier_all(); /* sync before exiting */ - return 1; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_lock_example.c b/_deprecated_sources/EXAMPLES/shmem_lock_example.c deleted file mode 100644 index 838e43ecf..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_lock_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include -long L = 0; - -int main(int argc, char **argv) -{ - int me, slp; - shmem_init(); - me = shmem_my_pe(); - slp = 1; - shmem_barrier_all(); - if (me == 1) - sleep (3); - shmem_set_lock(&L); - printf("%d: sleeping %d second%s...\n", me, slp, slp == 1 ? "" : "s"); - sleep(slp); - printf("%d: sleeping...done\n", me); - shmem_clear_lock(&L); - shmem_barrier_all(); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_max_example.f90 b/_deprecated_sources/EXAMPLES/shmem_max_example.f90 deleted file mode 100644 index 26918897c..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_max_example.f90 +++ /dev/null @@ -1,14 +0,0 @@ -INCLUDE "shmem.fh" -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -REAL FOO, FOOMAX, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -COMMON /COM/ FOO, FOOMAX, PWRK -INTRINSIC SHMEM_MY_PE() - -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_REAL8_MAX_TO_ALL(FOOMAX, FOO, NR, 0, 1, N$PES/2, -& PWRK, PSYNC) - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOMAX -ENDIF - diff --git a/_deprecated_sources/EXAMPLES/shmem_min_example.f90 b/_deprecated_sources/EXAMPLES/shmem_min_example.f90 deleted file mode 100644 index f7f3ddcbe..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_min_example.f90 +++ /dev/null @@ -1,14 +0,0 @@ -INCLUDE "shmem.fh" - -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -REAL FOO, FOOMIN, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -COMMON /COM/ FOO, FOOMIN, PWRK -INTRINSIC SHMEM_MY_PE() - -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_REAL8_MIN_TO_ALL(FOOMIN, FOO, NR, 0, 1, N$PES/2, -& PWRK, PSYNC) - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOMIN -ENDIF diff --git a/_deprecated_sources/EXAMPLES/shmem_mype_example.c b/_deprecated_sources/EXAMPLES/shmem_mype_example.c deleted file mode 100644 index 762d56a43..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_mype_example.c +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -int main(void) -{ - int me; - - shmem_init(); - me = shmem_my_pe(); - printf("My PE id is: %d\n", me); - - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_npes_example.c b/_deprecated_sources/EXAMPLES/shmem_npes_example.c deleted file mode 100644 index 788abda7d..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_npes_example.c +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include - -int main(void) -{ - int npes; - - shmem_init(); - - npes = shmem_n_pes(); - - if (shmem_my_pe() == 0) { - printf("Number of PEs executing this program is: %d\n", npes); - } - - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_or_example.f90 b/_deprecated_sources/EXAMPLES/shmem_or_example.f90 deleted file mode 100644 index 84b5dd1c9..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_or_example.f90 +++ /dev/null @@ -1,15 +0,0 @@ -INCLUDE "mpp/shmem.fh" - -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -REAL PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -INTEGER FOO, FOOOR -COMMON /COM/ FOO, FOOOR, PWRK -INTRINSIC SHMEM_MY_PE() - -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_INT8_OR_TO_ALL(FOOOR, FOO, NR, 0, 1, N$PES/2, -& PWRK, PSYNC) - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOOR -ENDIF diff --git a/_deprecated_sources/EXAMPLES/shmem_p_example.c b/_deprecated_sources/EXAMPLES/shmem_p_example.c deleted file mode 100644 index 740f9285d..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_p_example.c +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include -static const double e = 2.71828182; -static const double epsilon = 0.00000001; - -int main(void) -{ - double *f; - int me; - - shmem_init(); - me = shmem_my_pe(); - f = (double *) shmem_malloc(sizeof (*f)); - - *f = 3.1415927; - shmem_barrier_all(); - - if (me == 0) - shmem_double_p(f, e, 1); - - shmem_barrier_all(); - if (me == 1) - printf("%s\n", (fabs (*f - e) < epsilon) ? "OK" : "FAIL"); - - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_prod_example.f90 b/_deprecated_sources/EXAMPLES/shmem_prod_example.f90 deleted file mode 100644 index db1d1f1cb..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_prod_example.f90 +++ /dev/null @@ -1,14 +0,0 @@ -INCLUDE "shmem.fh" - -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -REAL FOO, FOOPROD, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -COMMON /COM/ FOO, FOOPROD, PWRK -INTRINSIC SHMEM_MY_PE() - -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_COMP8_PROD_TO_ALL(FOOPROD, FOO, NR, 0, 1, N$PES/2, -& PWRK, PSYNC) - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOPROD -ENDIF diff --git a/_deprecated_sources/EXAMPLES/shmem_ptr_example.c b/_deprecated_sources/EXAMPLES/shmem_ptr_example.c deleted file mode 100644 index 217b26b61..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_ptr_example.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include - -int main(void) -{ - static int bigd[100]; - int *ptr; - int i; - - shmem_init(); - - if (shmem_my_pe() == 0) { - /* initialize PE 1's bigd array */ - ptr = shmem_ptr(bigd, 1); - if (ptr == NULL) - printf("can't use pointer to directly access PE 1's array\n"); - else - for (i=0; i<100; i++) - *ptr++ = i+1; - } - - shmem_barrier_all(); - - if (shmem_my_pe() == 1) { - printf("bigd on PE 1 is:\n"); - for (i=0; i<100; i++) - printf(" %d\n",bigd[i]); - printf("\n"); - } - return 1; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_ptr_example.f90 b/_deprecated_sources/EXAMPLES/shmem_ptr_example.f90 deleted file mode 100644 index c7bba2866..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_ptr_example.f90 +++ /dev/null @@ -1,29 +0,0 @@ -PROGRAM REMOTEWRITE -INCLUDE 'shmem.fh' - -INTEGER BIGD(100) -SAVE BIGD - -INTEGER POINTEE(*) -POINTER (PTR,POINTEE) - -CALL SHMEM_INIT() - - -IF (SHMEM_MY_PE() .EQ. 0) THEN - ! initialize PE 1's BIGD array - PTR = SHMEM_PTR(BIGD, 1) ! get address of PE 1's BIGD - ! array - DO I=1,100 - POINTEE(I) = I - ENDDO -ENDIF - -CALL SHMEM_BARRIER_ALL - -IF (SHMEM_MY_PE() .EQ. 1) THEN - PRINT*,'BIGD on PE 1 is: ' - PRINT*,BIGD -ENDIF -END - diff --git a/_deprecated_sources/EXAMPLES/shmem_put_example.c b/_deprecated_sources/EXAMPLES/shmem_put_example.c deleted file mode 100644 index 50b6d38c0..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_put_example.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include - -int main(void) -{ - long source[10] = { 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10 }; - static long dest[10]; - shmem_init(); - if (shmem_my_pe() == 0) { - /* put 10 words into dest on PE 1 */ - shmem_long_put(dest, source, 10, 1); - } - shmem_barrier_all(); /* sync sender and receiver */ - printf("dest[0] on PE %d is %ld\n", shmem_my_pe(), dest[0]); - return 1; -} - diff --git a/_deprecated_sources/EXAMPLES/shmem_quiet_example.c b/_deprecated_sources/EXAMPLES/shmem_quiet_example.c deleted file mode 100644 index fb258a675..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_quiet_example.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include - -long dest[3] = {0}; -int targ = 0; -long source[3] = {1, 2, 3}; -int src = 90; - -int main(void) -{ - long x[3] = {0}; - int y = 0; - - shmem_init(); - if (shmem_my_pe() == 0) { - shmem_long_put(dest, source, 3, 1); /*put1*/ - shmem_int_put(&targ, &src, 1, 2); /*put2*/ - - shmem_quiet(); - - shmem_long_get(x, dest, 3, 1); /*gets updated value from dest on PE 1 to local array x */ - shmem_int_get(&y, &targ, 1, 2); /*gets updated value from targ on PE 2 to local variable y*/ - printf("x: {%ld,%ld,%ld}\n",x[0],x[1],x[2]); /*x: {1,2,3}*/ - printf("y: %d\n", y); /*y: 90*/ - - shmem_int_put(&targ, &src, 1, 1); /*put3*/ - shmem_int_put(&targ, &src, 1, 2); /*put4*/ - } - shmem_barrier_all(); /* sync sender and receiver */ - return 0; -} - diff --git a/_deprecated_sources/EXAMPLES/shmem_quiet_example.f90 b/_deprecated_sources/EXAMPLES/shmem_quiet_example.f90 deleted file mode 100644 index b2057910b..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_quiet_example.f90 +++ /dev/null @@ -1,21 +0,0 @@ -PROGRAM COMPFLAG -INTEGER FLAG_VAR, ARRAY(100), RECEIVER, SENDER -COMMON/FLAG/FLAG_VAR -COMMON/DATA/ARRAY -INTRINSIC SHMEM_MY_PE() - -FLAG_VAR = 0 -CALL SHMEM_BARRIER_ALL ! wait for FLAG_VAR to be initialized - -SENDER = 0 ! PE 0 sends the data -RECEIVER = 1 ! PE 1 receives the data -IF (SHMEM_MY_PE() .EQ. 0) THEN - ARRAY = 33 - CALL SHMEM_PUT(ARRAY, ARRAY, 100, RECEIVER) ! start sending data - CALL SHMEM_QUIET ! wait for delivery - CALL SHMEM_PUT(FLAG_VAR, 1, 1, RECEIVER) ! send completion flag -ELSE IF (SHMEM_MY_PE() .EQ. RECEIVER) THEN - CALL SHMEM_WAIT(FLAG_VAR, 0) - PRINT*,ARRAY ! ARRAY has been delivered -ENDIF -END diff --git a/_deprecated_sources/EXAMPLES/shmem_shmalloc_example.c b/_deprecated_sources/EXAMPLES/shmem_shmalloc_example.c deleted file mode 100644 index 22dabf588..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_shmalloc_example.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -int main(void) -{ - int *target; - int source; - - shmem_init(); - - source = shmem_my_pe(); - target = shmem_malloc(sizeof(int)); /*symmetric data object on heap*/ - - shmem_int_put(target, &source, 1, (source + 1)% shmem_n_pes()); - shmem_barrier_all(); - printf("On PE%d value of target is %d\n",source,*target); - - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_startpes_example.f90 b/_deprecated_sources/EXAMPLES/shmem_startpes_example.f90 deleted file mode 100644 index 3a7629464..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_startpes_example.f90 +++ /dev/null @@ -1,18 +0,0 @@ -PROGRAM PUT - -INTEGER TARG, SRC, RECEIVER, BAR -COMMON /T/ TARG -PARAMETER (RECEIVER=1) -CALL START_PES(0) - -IF (SHMEM_MY_PE() .EQ. 0) THEN - SRC = 33 - CALL SHMEM_INTEGER_PUT(TARG, SRC, 1, RECEIVER) -ENDIF - -CALL SHMEM_BARRIER_ALL ! SYNCHRONIZES SENDER AND RECEIVER - -IF (SHMEM_MY_PE() .EQ. RECEIVER) THEN - PRINT*,'PE ', SHMEM_MY_PE(),' TARG=',TARG,' (expect 33)' -ENDIF -END diff --git a/_deprecated_sources/EXAMPLES/shmem_sum_example.f90 b/_deprecated_sources/EXAMPLES/shmem_sum_example.f90 deleted file mode 100644 index bf4b87582..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_sum_example.f90 +++ /dev/null @@ -1,14 +0,0 @@ -INCLUDE "shmem.fh" - -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -REAL FOO, FOOSUM, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -COMMON /COM/ FOO, FOOSUM, PWRK -INTRINSIC SHMEM_MY_PE() - -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_INT4_SUM_TO_ALL(FOOSUM, FOO, NR, 0, 1, N$PES/2, -& PWRK, PSYNC) - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOSUM -ENDIF diff --git a/_deprecated_sources/EXAMPLES/shmem_swap_example.c b/_deprecated_sources/EXAMPLES/shmem_swap_example.c deleted file mode 100644 index f868015ea..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_swap_example.c +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -int main(void) -{ - long *dest; - int me, npes; - long swapped_val, new_val; - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - dest = (long *) shmem_malloc(sizeof (*dest)); - *dest = me; - shmem_barrier_all(); - new_val = me; - if (me & 1){ - swapped_val = shmem_long_swap(dest, new_val, (me + 1) % npes); - printf("%d: dest = %ld, swapped = %ld\n", me, *dest, swapped_val); - } - shmem_free(dest); - return 0; -} diff --git a/_deprecated_sources/EXAMPLES/shmem_wait1_example.f90 b/_deprecated_sources/EXAMPLES/shmem_wait1_example.f90 deleted file mode 100644 index 3d93e56f5..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_wait1_example.f90 +++ /dev/null @@ -1,2 +0,0 @@ -INTEGER*8 IVAR -CALL SHMEM_INT8_WAIT(IVAR, INTEGER*8(100)) diff --git a/_deprecated_sources/EXAMPLES/shmem_wait2_example.f90 b/_deprecated_sources/EXAMPLES/shmem_wait2_example.f90 deleted file mode 100644 index 9870781ca..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_wait2_example.f90 +++ /dev/null @@ -1,2 +0,0 @@ -INTEGER*8 IVAR -CALL SHMEM_INT8_WAIT_UNTIL(IVAR, SHMEM_CMP_NE, INTEGER*8(100)) diff --git a/_deprecated_sources/EXAMPLES/shmem_wait3_example.f90 b/_deprecated_sources/EXAMPLES/shmem_wait3_example.f90 deleted file mode 100644 index 16c3cd57b..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_wait3_example.f90 +++ /dev/null @@ -1,4 +0,0 @@ -#include #include - -int ivar; -shmem_int_wait_until(&ivar, SHMEM_CMP_LT, 0); diff --git a/_deprecated_sources/EXAMPLES/shmem_wait4_example.f90 b/_deprecated_sources/EXAMPLES/shmem_wait4_example.f90 deleted file mode 100644 index fc30fcf47..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_wait4_example.f90 +++ /dev/null @@ -1,12 +0,0 @@ -SUBROUTINE EXAMPLE() -INTEGER FLAG_VAR -COMMON/FLAG/FLAG_VAR -. . . -FLAG_VAR = FLAG_VALUE ! initialize the event variable -. . . -IF (FLAG_VAR .EQ. FLAG_VALUE) THEN - CALL SHMEM_WAIT(FLAG_VAR, FLAG_VALUE) -ENDIF -FLAG_VAR = FLAG_VALUE ! reset the event variable for next time -. . . -END diff --git a/_deprecated_sources/EXAMPLES/shmem_xor_example.f90 b/_deprecated_sources/EXAMPLES/shmem_xor_example.f90 deleted file mode 100644 index faaa8a05c..000000000 --- a/_deprecated_sources/EXAMPLES/shmem_xor_example.f90 +++ /dev/null @@ -1,14 +0,0 @@ -INCLUDE "mpp/shmem.fh" - -INTEGER PSYNC(SHMEM_REDUCE_SYNC_SIZE) -DATA PSYNC /SHMEM_REDUCE_SYNC_SIZE*SHMEM_SYNC_VALUE/ -PARAMETER (NR=1) -REAL FOO, FOOXOR, PWRK(MAX(NR/2+1,SHMEM_REDUCE_MIN_WRKDATA_SIZE)) -COMMON /COM/ FOO, FOOXOR, PWRK -INTRINSIC SHMEM_MY_PE() - -IF ( MOD(SHMEM_MY_PE() .EQ. 0) THEN - CALL SHMEM_REAL8_XOR_TO_ALL(FOOXOR, FOO, NR, 0, 1, N$PES/2, -& PWRK, PSYNC) - PRINT*,'Result on PE ',SHMEM_MY_PE(),' is ',FOOXOR -ENDIF diff --git a/_deprecated_sources/EXAMPLES/writing_shmem_example.c b/_deprecated_sources/EXAMPLES/writing_shmem_example.c deleted file mode 100644 index 6ee05d3f0..000000000 --- a/_deprecated_sources/EXAMPLES/writing_shmem_example.c +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include -#define SIZE 16 -int -main(int argc, char* argv[]) -{ - short source[SIZE]; - static short dest[SIZE]; - int i, npes; - shmem_init(); - npes = shmem_n_pes(); - if (shmem_my_pe() == 0) { - /* initialize array */ - for(i = 0; i < SIZE; i++) - source[i] = i; - /* local, not symmetric */ - /* static makes it symmetric */ - /* put "size" words into dest on each PE */ - for(i = 1; i < npes; i++) - shmem_short_put(dest, source, SIZE, i); - } - shmem_barrier_all(); /* sync sender and receiver */ - if (shmem_my_pe() != 0) { - printf("dest on PE %d is \t", shmem_my_pe()); - for(i = 0; i < SIZE; i++) - printf("%hd \t", dest[i]); - printf("\n"); - } - shmem_finalize(); - return 0; -} - diff --git a/_deprecated_sources/EXAMPLES/writing_shmem_example.output b/_deprecated_sources/EXAMPLES/writing_shmem_example.output deleted file mode 100644 index 0207d3ad7..000000000 --- a/_deprecated_sources/EXAMPLES/writing_shmem_example.output +++ /dev/null @@ -1,3 +0,0 @@ -dest on PE 1 is 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -dest on PE 2 is 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -dest on PE 3 is 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 diff --git a/_deprecated_sources/Fortran/hello.f90 b/_deprecated_sources/Fortran/hello.f90 deleted file mode 100644 index 619081dc5..000000000 --- a/_deprecated_sources/Fortran/hello.f90 +++ /dev/null @@ -1,16 +0,0 @@ -program whoami - - include 'shmem.fh' - - integer npes, me - character*32 h - - call start_pes(0) - - npes = num_pes() - me = my_pe() - call hostnm(h) - - print *, h, 'I am ', me, ' of ', npes - -end program whoami diff --git a/_deprecated_sources/INTRODUCTION/atomics_intro.tex b/_deprecated_sources/INTRODUCTION/atomics_intro.tex deleted file mode 100644 index 2c8830311..000000000 --- a/_deprecated_sources/INTRODUCTION/atomics_intro.tex +++ /dev/null @@ -1,30 +0,0 @@ -\label{sec:amo} -%\openshmem{} specification defines various \acp{AMO}. -\ac{AMO} is a one-sided communication mechanism that combines memory update operations with atomicity guarantees described in Section \ref{subsec:amo_guarantees}. -Similar to the \ac{RMA} routines, described in Section \ref{sec:rma}, -the \acp{AMO} are performed only on symmetric objects. -\openshmem{} defines the two types of \ac{AMO} routines: -\begin{itemize} -\item % Blocking\\ -The \textit{fetch-and-operate} routines combine memory update and fetch -operations in a single atomic operation. -The routines return after the data has been fetched and delivered to the local \ac{PE}. -%Fetch-and-operate operations are blocking atomic operation and return as -%soon as the fetched is delivered to the initiator of the operation. - -The \textit{fetch-and-operate} operations include: \FUNC{SHMEM\_CSWAP}, \FUNC{SHMEM\_SWAP}, \FUNC{SHMEM\_FINC}, and\\ \FUNC{SHMEM\_FADD}. - -\item % Non-Blocking\\ -The \textit{non-fetch} atomic routines update the remote memory -in a single atomic operation. -A \textit{non-fetch} atomic routine -starts the atomic operation and may return before the operation execution -on the remote \ac{PE}. -To force completion for these \textit{non-fetch} atomic routines, \FUNC{shmem\_quiet}, -\FUNC{shmem\_barrier}, or \FUNC{shmem\_barrier\_all} can be used by an \openshmem{} program. - -The \textit{non-fetch} operations include: \FUNC{SHMEM\_INC} and \FUNC{SHMEM\_ADD}. -\end{itemize} -%Fetch-and-operate routines are described in Section \ref{sec:amo_fetch} and non-fetch routines -%described in Section \ref{sec:amo_nonfetch}. -%All atomic operations in \openshmem{} are blocking operations. diff --git a/_deprecated_sources/INTRODUCTION/changelog.tex b/_deprecated_sources/INTRODUCTION/changelog.tex deleted file mode 100644 index 67a9adbaf..000000000 --- a/_deprecated_sources/INTRODUCTION/changelog.tex +++ /dev/null @@ -1,58 +0,0 @@ -\section{Version 1.1} -This section summarizes the changes from the \openshmem specification Version 1.0 to the Version 1.1. -A major change in this version is that it provides an accurate description of \openshmem interfaces so that they are in agreement with the SGI specification. This version also explains \openshmem’s programming, memory, and execution model. The document was throughly changed to improve the readability of specification and usability of interfaces. The code examples were added to demonstrate the usability of API. Additionally, diagrams were added to help understand the subtle semantic differences of various operations. - -%This section summarizes the changes from the \openshmem specification version 1.0 to the version 1.1. -%The major changes consist of more accurate descriptions of the \openshmem \acp{API} that -%were part of the original SGI specification, but were omitted in version 1.0e. This new version also provides new -%diagrams that explain the correct behavior of \openshmem synchronization interfaces in terms of ordering, synchronization, delivery and -%completion of operations. Version 1.1 also does a better job at explaining the programming model, memory model and execution model -%of \openshmem. - -The following list describes the specific changes in 1.1:%\rcomment{\\Eric: "better" is a subjective/ambiguous term, might need need a term to definitively describe how it is better. i.e. is it more descriptive, more detailed, more accurate, easier to understand? etc.\\} - -\begin{itemize} -\item Clarifications of the completion semantics of memory synchronization -interfaces.\\See Section \ref{subsec:memory_order}. -\item Clarification of the completion semantics of memory load and store -operations in context of \FUNC{shmem\_barrier\_all} and \FUNC{shmem\_barrier} -routines.\\See Section \ref{subsec:shmem_barrier_all} and \ref{subsec:shmem_barrier}. -\item Clarification of the completion and ordering semantics of \FUNC{shmem\_quiet} and \FUNC{shmem\_fence}. -\\See Section \ref{subsec:shmem_quiet} and \ref{subsec:shmem_fence}. -\item Clarifications of the completion semantics of \ac{RMA} and \ac{AMO} routines. -\\See Sections \ref{sec:rma} and \ref{sec:amo} -\item Clarifications of the memory model and the memory alignment requirements for symmetric data objects. -\\See Section \ref{subsec:memory_model}. -\item Clarification of the execution model and the definition of a \ac{PE}. -\\See Section \ref{subsec:execution_model} -\item Clarifications of the semantics of \FUNC{shmem\_pe\_accessible} and \FUNC{shmem\_addr\_accessible}. -\\See Section \ref{subsec:shmem_pe_accessible} and \ref{subsec:shmem_addr_accessible}. -\item Added an annex on interoperability with \ac{MPI}.\\See Annex \ref{sec:mpi}. -\item Added examples to the different interfaces. -\item Clarification of the naming conventions for constant in \Clang{} and \Fortran{}. -\\See Section \ref{subsec:library_constants} and \ref{subsec:shmem_wait}. -\item Added \ac{API} calls: \FUNC{shmem\_char\_p}, \FUNC{shmem\_char\_g}.\\See Sections \ref{subsec:shmem_p} and \ref{subsec:shmem_g}. -%These calls are part of the SGI specification. -\item Removed \ac{API} calls: \FUNC{shmem\_char\_put}, \FUNC{shmem\_char\_get}.\\ See Sections \ref{subsec:shmem_put} and \ref{subsec:shmem_get}. -\item The usage of \VAR{ptrdiff\_t}, \VAR{size\_t}, and \VAR{int} in the interface signature - was made consistent with the description.\\See Sections \ref{subsec:coll}, \ref{subsec:shmem_iput}, and \ref{subsec:shmem_iget}. -%Revisions from public comment -\item Revised \FUNC{shmem\_barrier} example.\\See Section \ref{subsec:shmem_barrier}. -\item Clarification of the initial value of \VAR{pSync} work arrays for \FUNC{shmem\_barrier}.\\ See Section \ref{subsec:shmem_barrier}. -\item Clarification of the expected behavior when multiple \FUNC{start\_pes} calls are encountered has been clarified.\\See Section \ref{subsec:start_pes}. -\item Corrected the definition of atomic increment operation.\\See Section \ref{subsec:shmem_inc}. -%\item Removed malloc_error from shmalloc in Specification 1.1 under Section 8.2.1. -\item Clarification of the size of the symmetric heap and when it is set.\\See Section \ref{subsec:shfree}. -\item Clarification of the integer and real sizes for \Fortran{} \ac{API}.\\See Sections \ref{subsec:shmem_add}, \ref{subsec:shmem_cswap}, \ref{subsec:shmem_swap}, \ref{subsec:shmem_finc}, \ref{subsec:shmem_inc}, and \ref{subsec:shmem_fadd}. -\item Clarification of the expected behavior on program \OPR{exit}.\\ See Section \ref{subsec:execution_model}, Execution Model. -\item More detailed description for the progress of \openshmem operations provided.\\ See Section \ref{subsec:progress}. -\item Clarification of naming convention for non-standard interfaces and their inclusion in \FUNC{shmemx.h}.\\ See Section \ref{subsec:bindings}. -\item Various fixes to \openshmem code examples across the specification to include appropriate header files. -%\item Clarification of store operation as memory store in context of \FUNC{shmem\_fence} and {shmem\_quiet} under Sections 8.7.1 and 8.7.2. Duplicate -\item Removing requirement that implementations should detect size mismatch and return error information for \FUNC{shmalloc} and ensuring consistent language.\\See Sections \ref{subsec:shfree} and Annex \ref{sec:undefined}. -\item Fortran programming fixes for examples.\\ See Sections \ref{subsec:shmem_reductions} and \ref{subsec:shmem_wait}. -\item Clarifications of the reuse \VAR{pSync} and \VAR{pWork} across collectives.\\See Sections \ref{subsec:coll}, \ref{subsec:shmem_broadcast}, \ref{subsec:shmem_collect} and \ref{subsec:shmem_reductions}. -\item Name changes for UV and ICE for SGI systems.\\See Annex \ref{sec:openshmem_history}. -%\item Removed XOR reduction for REAL and COMPLEX data types under Section 8.5.5. (Issue #93) -%These calls are not part of the SGI specification. -\end{itemize} diff --git a/_deprecated_sources/INTRODUCTION/changelog1.2.tex b/_deprecated_sources/INTRODUCTION/changelog1.2.tex deleted file mode 100644 index 4f4aa7070..000000000 --- a/_deprecated_sources/INTRODUCTION/changelog1.2.tex +++ /dev/null @@ -1,40 +0,0 @@ -\section{Version 1.2} -This section summarizes the changes from the \openshmem specification Version 1.1 to Version 1.2. -A major change in this version is that it improves upon the execution model described in 1.1 by introducing an explicit \FUNC{shmem\_finalize} library call. This provides a collective mechanism of exiting an \openshmem program and releasing resources used by the library. - - - -The following list describes the specific changes in 1.2: -\begin{itemize} -\item Added specification of \VAR{pSync} initialization for all routines that use it. -\item Replaced all placeholder variable names \VAR{target} with \VAR{dest} to avoid confusion with Fortran `target' keyword. -\item New Execution Model for exiting/finishing OpenSHMEM programs. -\\See Section \ref{subsec:execution_model}. -\item New library constants to support API that query version and name information. -\\See Section \ref{subsec:library_constants}. -\item New API \FUNC{shmem\_init} to provide mechanism to start an \openshmem program and replace deprecated \FUNC{start\_pes}. -\\See Section \ref{subsec:shmem_init}. -\item Deprecation of \FUNC{\_my\_pe} and \FUNC{\_num\_pes} routines. -\\See Sections \ref{subsec:shmem_my_pe} and \ref{subsec:shmem_num_pe}. -\item New API \FUNC{shmem\_finalize} to provide collective mechanism to cleanly exit an \openshmem program and release resources. -\\See Section \ref{subsec:shmem_finalize}. -\item New API \FUNC{shmem\_global\_exit} to provide mechanism to exit an \openshmem program. -\\See Section \ref{subsec:shmem_global_exit}. -\item Clarification related to the address of the referenced object in \FUNC{shmem\_ptr}. -\\See Section \ref{subsec:shmem_ptr}. -\item New API to query the version and name information. -\\See Section \ref{subsec:shmem_version} and \ref{subsec:shmem_name}. -\item \openshmem library API normalization. All C symmetric memory management API begins with \FUNC{shmem\_}. -\\See Section \ref{subsec:shfree}. -\item Notes and clarifications added to \FUNC{shmem\_malloc}. -\\See Section \ref{subsec:shfree}. -\item Deprecation of Fortran API routine \FUNC{SHMEM\_PUT}. -\\See Section \ref{subsec:shmem_put}. -\item Clarification related to \FUNC{shmem\_wait}. -\\See Section \ref{subsec:shmem_wait}. -\item Undefined behavior for null pointers without zero counts added. -\\See Section \ref{sec:undefined} -\item Addition of new Annex for clearly specifying deprecated API and its support in the existing specification version. -\\See Section \ref{sec:dep_api}. - -\end{itemize} diff --git a/_deprecated_sources/INTRODUCTION/collective_intro.tex b/_deprecated_sources/INTRODUCTION/collective_intro.tex deleted file mode 100644 index d34246220..000000000 --- a/_deprecated_sources/INTRODUCTION/collective_intro.tex +++ /dev/null @@ -1,53 +0,0 @@ -%Comments for Manju: -%Are Barrier_all, shmalloc, etc considered collectives? If yes, we need to state that all PEs belong to an implicit active set that contains all PES. -%Also describe the case where collective operations may be invoked by the implicit active set (all PEs) or active sets PEs -%State that collectives can be executed on statements that are not ordered . -%State that from the beginning to the end of the program, the sequence of collectives should be the same on a given active set (or implicit active set). -%Are arguments the same for PEs that call the same collective? (i.e. target or source symmetric data?) -%Which collectives imply synchronization (i.e. barrier, quiet, etc) which ones not (i.e. broadcast on root?) - -\emph{Collective routines} are defined as communication or synchronization operations -on a group of \acp{PE} called an \activeset{}. The collective routines require all -\acp{PE} in the \activeset{} to simultaneously call the routine. -A \ac{PE} that is not part of the \activeset{} calling the collective -routines results in an undefined behavior. All -collective routines have an \activeset{} as an input parameter except -\barrierall{}. The \barrierall{} is called by all \acp{PE} of the \openshmem{} program. - -The \activeset{} is defined by the arguments \VAR{PE\_start}, \VAR{logPE\_stride}, -and \VAR{PE\_size}. \VAR{PE\_start} is the starting \ac{PE} number, a log (base -2) of \VAR{logPE\_stride} is the stride between \acp{PE}, and \VAR{PE\_size} is -the number of \acp{PE} participating in the \activeset{}. All \acp{PE} participating in the -collective routines provide the same values for these arguments. - -Another argument important to collective routines is \VAR{pSync}, which is a -symmetric work array. All \acp{PE} participating in a collective must pass the same -\VAR{pSync} array. On completion of a collective call, the \VAR{pSync} is restored to its -original contents. The user is permitted to reuse a \VAR{pSync} array if -all previous collective routines using the \VAR{pSync} array have been completed by all participating -\acp{PE}. One can use a synchronization collective routine such as \barrier{} -to ensure completion of previous collective routines. The \FUNC{shmem\_barrier} routine allows the same \VAR{pSync} array to be used on consecutive calls as long as the \ac{PE} \activeset{} does not change. - -%The two cases below -%show the reuse of \VAR{pSync} array: -% -%\begin{itemize} -%\item The \FUNC{shmem\_barrier} function allows the same \VAR{pSync} array to be used on consecutive calls as long as the active \ac{PE} set does not change. -%\item If the same collective function is called multiple times with the -% same \activeset, the calls may alternate between two \VAR{pSync} arrays. -% The \openshmem functions guarantee that a first call is completely finished by -% all \ac{PE}s by the time processing of a third call begins on any \ac{PE}. -%\end{itemize} - - -All collective routines defined in the specification are blocking. The -collective routines return on completion. The collective routines defined in the \openshmem{} specification -are: - -\begin{itemize} -\item[] \broadcast{} -\item[] \barrier{} -\item[] \barrierall{} -\item[] \collect{} -\item[] \reduction{} -\end{itemize} diff --git a/_deprecated_sources/INTRODUCTION/compile_exec_applications.tex b/_deprecated_sources/INTRODUCTION/compile_exec_applications.tex deleted file mode 100644 index d87463dcf..000000000 --- a/_deprecated_sources/INTRODUCTION/compile_exec_applications.tex +++ /dev/null @@ -1,61 +0,0 @@ - -As of this writing, the \openshmem{} specification is silent regarding how \openshmem{} programs are compiled, linked and run. This section shows some examples of -how wrapper programs are utilized in the \openshmem{} Reference Implementation to compile and launch programs. -%The commands are styled after wrapper programs found in many MPI implementations. -\section{Compilation} -\subsection*{Programs written in \Clang} - -The \openshmem{} Reference Implementation provides a wrapper program named \textbf{oshcc}, to aid in the compilation of \Clang{} programs, the wrapper -could be called as follows: - -\begin{lstlisting}[language=bash] -oshcc -o myprogram myprogram.c -\end{lstlisting} -Where the $\langle\mbox{compiler options}\rangle$ are options understood by the underlying \Clang{} compiler. - - -\subsection*{Programs written in \Cpp} - -The \openshmem{} Reference Implementation provides a wrapper program named \textbf{oshCC}, to aid in the compilation of \Cpp{} programs, the -wrapper could be called as follows: - -\begin{lstlisting}[language=bash] -oshCC -o myprogram myprogram.cpp -\end{lstlisting} -Where the $\langle\mbox{compiler options}\rangle$ are options understood by the underlying \Cpp{} compiler called by \textbf{oshCC}. - - -\subsection*{Programs written in \Fortran} - -The \openshmem{} Reference Implementation provides a wrapper program named \textbf{oshfort}, to aid in the compilation of \Fortran{} programs, -the wrapper could be called as follows: - -\begin{lstlisting}[language=bash] -oshfort -o myprogram myprogram.f -\end{lstlisting} -Where the $\langle\mbox{compiler options}\rangle$ are options understood by the underlying \Fortran{} compiler called by \textbf{oshfort}. - -\section{Running Programs} - -The \openshmem{} Reference Implementation provides a wrapper program named \textbf{oshrun}, to launch \openshmem programs, the wrapper could -be called as follows: - -\begin{lstlisting}[language=bash] -oshrun -np <#> -\end{lstlisting} -The program arguments for \textbf{oshrun} are: - -\begin{tabular}{p{0.3\textwidth}p{0.6\textwidth}} -$\langle\mbox{additional options}\rangle$ & {Options passed to the underlying launcher.}\tabularnewline --np $\langle\mbox{\#}\rangle$ & {The number of \ac{PE}s to be used in the execution.}\tabularnewline -$\langle\mbox{program}\rangle$ & {The program executable to be launched.}\tabularnewline -$\langle\mbox{program arguments}\rangle$ & {Flags and other parameters to pass to the program.}\tabularnewline -\end{tabular} - -%\begin{description} -%\item[$\langle\mbox{additional options}\rangle$] options passed to the underlying launcher -%\item[-np $\langle\mbox{\#}\rangle$] The number of processing elements (PEs) to be used -%in the execution. -%\item [$\langle\mbox{program}\rangle$] The program executable to be launched -%\item [$\langle\mbox{program arguments}\rangle$] flags and other parameters to pass to the program -%\end{description} diff --git a/_deprecated_sources/INTRODUCTION/deprecated_API.tex b/_deprecated_sources/INTRODUCTION/deprecated_API.tex deleted file mode 100644 index 0c962072d..000000000 --- a/_deprecated_sources/INTRODUCTION/deprecated_API.tex +++ /dev/null @@ -1,20 +0,0 @@ -For the \openshmem Specification(s), deprecation is the process of identifying API that is supported but no longer recommended for use by program users. For \openshmem library users, said API \textbf{must} be supported until clearly indicated as otherwise by the Specification. In this chapter we will record the API that has been deprecated, the \openshmem Specification that effected the deprecation, and if the feature is supported in the current version of the specification. - -\begin{center} -\begin{tabular}{|l|c|c|c|} - \hline - \textbf{Deprecated API} & \textbf{Deprecated Since} & \textbf{Currently Supported(?)} & \textbf{Replaced By}\\ \hline %There may be better table headings... - \FUNC{\_my\_pe} & 1.2 & Yes & \FUNC{shmem\_my\_pe} \\ \hline - \FUNC{\_num\_pes} & 1.2 & Yes & \FUNC{shmem\_n\_pes} \\ \hline - \FUNC{shmalloc} & 1.2 & Yes & \FUNC{shmem\_malloc} \\ \hline - \FUNC{shfree} & 1.2 & Yes & \FUNC{shmem\_free} \\ \hline - \FUNC{shrealloc} & 1.2 & Yes & \FUNC{shmem\_realloc} \\ \hline - \FUNC{shmemalign} & 1.2 & Yes & \FUNC{shmem\_align} \\ \hline - \FUNC{start\_pes} & 1.2 & Yes & \FUNC{shmem\_init} \\ \hline - \FUNC{SHMEM\_PUT} & 1.2 & Yes & \FUNC{SHMEM\_PUT8} or \FUNC{SHMEM\_PUT64} \\ \hline - - - - \hline - \end{tabular} -\end{center} diff --git a/_deprecated_sources/INTRODUCTION/deprication.tex b/_deprecated_sources/INTRODUCTION/deprication.tex deleted file mode 100644 index b294a3fff..000000000 --- a/_deprecated_sources/INTRODUCTION/deprication.tex +++ /dev/null @@ -1,7 +0,0 @@ -All of these routines are deprecated and are provided for backwards -compatibility. -Implementations must include all items in this section -and the routines should function properly, while notifying the user -about deprecation of the functionality. - -%\rcomment{Do we want to change the requirement from ``must" to ``optional"} diff --git a/_deprecated_sources/INTRODUCTION/environment_variables.tex b/_deprecated_sources/INTRODUCTION/environment_variables.tex deleted file mode 100644 index b138ed7f3..000000000 --- a/_deprecated_sources/INTRODUCTION/environment_variables.tex +++ /dev/null @@ -1,26 +0,0 @@ - -\section{Environment Variables } - -The \openshmem specification provides a set of environment variables that allows users -to configure the \openshmem implementation, and receive information about the -implementation. The implementations of the specification are free to define additional variables. Currently, the specification defines four environment variables. - -\medskip{} - - -\begin{tabular}{|l|l|l|} -\hline -Variable & Value & Routine\tabularnewline -\hline -\hline -\texttt{SMA\_VERSION} & any & print the library version at start-up\tabularnewline -\hline -\texttt{SMA\_INFO} & any & print helpful text about all these environment variables\tabularnewline -\hline -\texttt{SMA\_SYMMETRIC\_SIZE} & non-negative integer & number of bytes to allocate for symmetric heap\tabularnewline -\hline -\texttt{SMA\_DEBUG} & any & enable debugging messages\tabularnewline -\hline -\end{tabular} - -\medskip{} diff --git a/_deprecated_sources/INTRODUCTION/examples.tex b/_deprecated_sources/INTRODUCTION/examples.tex deleted file mode 100644 index 3ce534f9b..000000000 --- a/_deprecated_sources/INTRODUCTION/examples.tex +++ /dev/null @@ -1,24 +0,0 @@ -% Custom program listings for example code -% Features: -% - line numbers -% - reduced font size (so they can fit on one page) -\lstset{ - numbers=left, - numberstyle=\tiny, - stepnumber=1, - numbersep=5pt, - numberblanklines=true, - basicstyle=\scriptsize\ttfamily, - keywordstyle=\scriptsize\color{RoyalBlue}\ttfamily\bfseries, - stringstyle=\scriptsize, - commentstyle=\scriptsize, - identifierstyle=\scriptsize\color{black}\ttfamily\bfseries -} - - -\input{INTRODUCTION/examples_C.tex} - -\pagebreak{} - - -\input{INTRODUCTION/examples_Fortran.tex} diff --git a/_deprecated_sources/INTRODUCTION/examples_C.tex b/_deprecated_sources/INTRODUCTION/examples_C.tex deleted file mode 100644 index 0858a1e72..000000000 --- a/_deprecated_sources/INTRODUCTION/examples_C.tex +++ /dev/null @@ -1,33 +0,0 @@ -\subsection{C Language Examples} - -\lstinputlisting[caption={Program that is a trivial Hello World.},label={HelloW},language={C++}]{C/helloworld.c} - -\lstinputlisting[caption={Program that implements a Circular Shift.},label={CircSh},language={C++}]{C/circ.c} - -\pagebreak{} - -\lstinputlisting[caption={Program that demonstrates the use of shmalloc.},label={shamalloc},language={C++}]{C/shmalloc.c} - -\lstinputlisting[caption={Program that implements Ping.},label={ping},language={C++}]{C/ping.c} - -\pagebreak{} - -\lstinputlisting[caption={Program that uses the MAX reduction.},label={max},language={C++}]{C/reduce-max.c} - -\pagebreak{} - -\lstinputlisting[caption={Program that makes use of strided puts.},label={iput},language={C++}]{C/iput.c} - -\pagebreak{} - -\lstinputlisting[caption={Program that implements an ALL-2-ALL (header)},label={all2all_head},language={C++}]{C/bench.h} - -\pagebreak{} - -\lstinputlisting[caption={Program that implements an ALL-2-ALL (main)},label={all2allMain},language={C++}]{C/all2all_main.c} - -\pagebreak{} - -\lstinputlisting[caption={Program that implements an ALL-2-ALL (subs)},label={all2allSub},language={C++}]{C/all2all_subs.c} - -\lstinputlisting[caption={Program that computes Pi},label={compute-pi},language={C++}]{C/pi.c} diff --git a/_deprecated_sources/INTRODUCTION/examples_Fortran.tex b/_deprecated_sources/INTRODUCTION/examples_Fortran.tex deleted file mode 100644 index 9f3a629e9..000000000 --- a/_deprecated_sources/INTRODUCTION/examples_Fortran.tex +++ /dev/null @@ -1,4 +0,0 @@ - -\subsection{Fortran Language Examples} - -\lstinputlisting[caption={Hello World program},label={helloworldf},language={Fortran}]{Fortran/hello.f90} diff --git a/_deprecated_sources/INTRODUCTION/execution_model.tex b/_deprecated_sources/INTRODUCTION/execution_model.tex deleted file mode 100644 index efff8ff05..000000000 --- a/_deprecated_sources/INTRODUCTION/execution_model.tex +++ /dev/null @@ -1,127 +0,0 @@ -%Outline -%%Exectution model -% *Define what is a OpenSHMEM program: a set of processes (either SPMD or MIMD?) where each process has its own 'local' (private) memory and symmetric memory regions that may be accessible by any PEs. -% *Each OpenSHMEM process is called a processing element (PE) -% *Each PE may be mapped to many to one hardware cores/threads or less. -% *The number of PEs is specified at launch/runtime. -% *Each PE must call startpe to initialize the OpenSHMEM runtime, before any other call for OpenSHMEM. There is an implicit barrier at startpe. -% *Each PE executes asynchronously following Fortran or program execution in C [ISO/IEC00 Sec. 5.1.2.3] -% *Each PE will have a unique global identifier and the execution of a program may depend on the PE id, if executed in SPMD. -% *PE id may be used for library calls synchronizations, control flow constructs language in C/Fortran -% *PE may allocate symmetric data objects via a symmetric heap during execution%SP: Does not cover global and static. -% *As of now, PEs may finish execution at any time by returning from the main routine. (no call to shmem_finalize yet!) -% -%This comes from the UPC spec: -%The memory consistency model in a language defines the order in which the results of write operations may be observed through read operations. -%The behavior of a OpenSHMEM program may depend on the timing of accesses to symmetric variables on PEs, so in general a program defines a set of possible executions, -%rather than a single execution. The memory consistency model constrains the set of possible executions for a given program; the user may then rely -%on properties that are true of all of those executions. - - -\section{Execution Model} -%\openshmem can use a single process multiple data (SPMD) or MIMD -%parallelism. An \openshmem application makes use of multiple processors, -%referred to as Processing Elements or PEs, to complete operations -%in parallel. -Although \openshmem follows the SPMD execution model, different \ac{PE}s may have different execution paths and will execute asynchronously following \Fortran{} or program execution in \Clang. Each \ac{PE} may be mapped to many to one hardware cores/threads or less. In \openshmem the number of \ac{PE}s is specified at runtime. - -\openshmem requires initialization before using any of the \openshmem library -routines by calling \textbf{start\_pes()}. %during the initialization phase of a program. %SP:repetitive. -The \ac{PE}s do not exist till after \FUNC{start\_pes} returns. \FUNC{start\_pes} performs any required initialization steps, such as setting up the symmetric heap for every \ac{PE} and creating and assigning \ac{PE} numbers which act like unique global identifiers for the duration of the program. These \ac{PE} identifiers are integers assigned in a monotonically increasing manner from zero to the total number of \ac{PE}s minus 1. \ac{PE} identifiers are used on \openshmem library calls (i.e. to access symmetric objects from specific \ac{PE}s, collective synchronization, etc.) or to dictate a definite control flow for \ac{PE}s using constructs of \Clang{} or \Fortran. Some collective routines require the creation of an \activeset, which is group of \ac{PE}s that is involved in the execution of a collective routine. These collective routines assume that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the \activeset{} calls an \openshmem collective routine, undefined behavior results. -An OpenSHMEM program consists of a set of processes, called \ac{PE}s where each process has its own 'local' (private) memory and symmetric memory regions that may be accessible by any \ac{PE}s. -Although \openshmem follows the \ac{SPMD} execution model, different \ac{PE}s may have different execution paths and will execute asynchronously following \Fortran{} or program execution in \Clang{} or \Cpp. Each PE may be mapped to many to one hardware cores/threads or less. In \openshmem the number of \ac{PE}s is specified at runtime. - -\openshmem requires initialization before using any of the OpenSHMEM library -routines by calling \FUNC{start\_pes}.%during the initialization phase of a program. %SP:repetitive. -The \ac{PE}s do not exist till after \FUNC{start\_pes} returns. \FUNC{start\_pes} performs any required initialization steps, such as setting up the symmetric heap for every \ac{PE} and creating and assigning \ac{PE} numbers -which act like unique global identifiers for the duration of the program. These \ac{PE} identifiers are integers assigned in a monotonically increasing manner from zero to the total number of \ac{PE}s minus 1. \ac{PE} identifiers -are used for other \openshmem library calls (such as collective synchronization) or to dictate a definite control flow for \ac{PE}s using constructs of \Clang{} or \Fortran{}. Some collective routines require the creation of an -\activeset, which is group of \ac{PE}s that is involved in the execution of a collective routine. These collective routines assume that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the \activeset{} calls a -\openshmem collective routine, undefined behavior results. - -%The symmetric heap is one of the memory spaces -%that is remotely accessible by all PEs. The symmetric heap is discussed -%further in the Memory Model section. The PE numbers are the -%identifiers used to refer to each of the PEs involved in the execution. -%Consistent with the SPMD nature of the \openshmem programming model is the concept of symmetric data objects. These are arrays or variables that exist with the same size, type, and relative address on all PEs. Another term for symmetric data objects is "remotely accessible data objects." In the interface definitions for \openshmem data transfer functions, one or more of the parameters are typically required to be symmetric or remotely accessible. The following kinds of data objects are symmetric: -%\begin{itemize} -% \item Fortran data objects in common blocks or with the SAVE attribute. These data objects must not be defined in a dynamic shared object (DSO). -% \item Non-stack C and C++ variables. These data objects must not be defined in a DSO. -% \item Fortran arrays allocated with \textit{shpalloc} -% \item C and C++ data allocated by \textit{shmalloc} -%\end{itemize} -% -%Data transfer in \openshmem is possible through several one-sided put -%(for write) and get (for read) operations, as well as various collective -%routines such as broadcasts and reductions. Since the library provides the flexibility of one-sided operations the execution pattern is depends on the how the programmer decides to distribute work amongst different PEs and the synchronization and ordering operations used. -% -%Query routines are available to gather information about the execution. -%\openshmem also provides synchronization routines to coordinate data -%transfers and other operations. -As of now, an \openshmem program finishes execution by returning from the main routine. -It is up to the implementation on how to handle the finalization of the -\openshmem library and any other resources initialized by the library: -there is currently no explicit call defined in the \openshmem specification. - - -\subsection{Progress of \openshmem Routines} - -The \openshmem model assumes that computation and communication are -naturally overlapped. High quality \openshmem implementations must insure that programs exhibit %SP: Changing MUST to may as per discussion on 01/31/2014 -progression of communication both with and without \openshmem calls. -Consider a \ac{PE} that is engaged in a long computation with no \openshmem calls. -Other \ac{PE}s must be able to communicate (put/get, -collective, atomic) with that computationally-bound \ac{PE} without that \ac{PE} -issuing any explicit \openshmem calls. \openshmem communication calls involving that \ac{PE} must progress -regardless of when that \ac{PE} next engages in an \openshmem call. - -\textbf{Note to implementers:} progress will often be ensured through -the use of a dedicated progress thread in software, or through -network hardware that offloads communication handling from processors. - -%\subsection{Using the Symmetric \VAR{Work} and \VAR{pSync} Arrays} - -%Multiple \VAR{pSync} arrays are often needed if a particular \ac{PE} calls a \openshmem -%collective function twice without intervening barrier synchronization. -%Problems would occur if some \ac{PE}s in the \activeset{} for call 2 arrive at -%call 2 before processing of call 1 is complete by all \ac{PE}s in the call 1 -%\activeset. You can use \FUNC{shmem\_barrier} or \FUNC{shmem\_barrier\_all} to -%perform a barrier synchronization between consecutive calls to \openshmem -%collective functions. There are two special cases: -%\begin{itemize} -%\item The \FUNC{shmem\_barrier} function allows the same \VAR{pSync} array to be used -% on consecutive calls as long as the active \ac{PE} set does not change. -%\item If the same collective function is called multiple times with the -% same \activeset, the calls may alternate between two \VAR{pSync} arrays. -% The \openshmem functions guarantee that a first call is completely finished by -% all \ac{PE}s by the time processing of a third call begins on -% any \ac{PE}. -%\end{itemize} -%Because the \openshmem functions restore \VAR{pSync} to its original contents, -%multiple calls that use the same \VAR{pSync} array do not require that \VAR{pSync} -%be reinitialized after the first call. - -\subsection{Atomicity Guarantees} - -\openshmem contains a number of routines that operate on symmetric data -atomically. These routines guarantee that accesses by \openshmem's -atomic routines will be exclusive, but do not guarantee exclusivity -in combination with other routines, either inside \openshmem or -outside. - -For example: during the execution of a remote integer increment -routine on a symmetric variable \VAR{x}, no other \openshmem atomic -routine may access \VAR{x}. After the increment, \VAR{x} will have -increased its value by \CONST{1} on the target \ac{PE}, at which point other -atomic routines may then modify that \VAR{x}. - -% %Memory model -% *Each OpenSHMEM PEs may have symmetric memory that is accessible by other PEs. -% *Symmetric memory is a region of memory where all the an instance of a data objects is replicated across PEs, have -% the same the same layout and relative offset. -% *All PEs can allocate a symmetric data objects using the symmetric heap, but they must do so as a collective operation. (is there a barrier after shmalloc?) -% *All writes to symmetric memory are relaxed (I'm not sure if this is the completion semantics) and are guaranteed to be visible to other PEs after a barrier_all, barrier(?), quiet, (what about wait? does it means iti sonly visible to me?) -% *Calls to barrier, barrier_all, quiet, wait, lock, atomics, are meant to guarantee memory consistency across PEs. -% *Read/Writes to symmetric data object may appear after startpe or after a the symmetric data object has been allocated in the symmetric heap (if it is a dynamic). -% *Operations like reduction, collect, etc guarantee memory consistency after completion(?) -% *Data races are possible in OpenSHMEM if multiple PEs write/read a symmetric data object from a single PE without proper synchronization. diff --git a/_deprecated_sources/INTRODUCTION/introduction.tex b/_deprecated_sources/INTRODUCTION/introduction.tex deleted file mode 100644 index a6a7303fd..000000000 --- a/_deprecated_sources/INTRODUCTION/introduction.tex +++ /dev/null @@ -1,10 +0,0 @@ -\section{Introduction} - -This document defines the elements of the \openshmem Application Programming -Interface~\footnote{SHMEM and \openshmem are trademarks of Silicon Graphics International Corp.}. -The purpose of the \openshmem \ac{API} is to provide programmers -with a standard interface for writing parallel programs -using \Clang, \Cpp{} and \Fortran{} with one-sided communication. - -More information about the \openshmem project can be found at:\\ -\url{http://www.openshmem.org/} diff --git a/_deprecated_sources/INTRODUCTION/introduction_pgas.tex b/_deprecated_sources/INTRODUCTION/introduction_pgas.tex deleted file mode 100644 index 687c2580c..000000000 --- a/_deprecated_sources/INTRODUCTION/introduction_pgas.tex +++ /dev/null @@ -1,168 +0,0 @@ - -\section{What is \openshmem?} - -This section is an introduction to previous work on \openshmem. We begin -with a quick overview of the Partitioned Global Address Space model, -which is the basis for \openshmem's data sharing strategy. - - -\subsection{Partitioned Global Address Space} - -Conventional Parallel Programming Models can be broadly classified -into 2 types: -\begin{description} -\item [{{Shared-Memory~Model:}}] in this model all processors interact -with a globally available memory space. -\item [{{Distributed-Memory~Model:}}] in this model each processor has -its own memory to work with and can only directly access the data -that resides in its memory. When a processor needs data from another -processor an explicit function call must be made to communicate with -the target processor. -\end{description} -The current high performance computing architectures prefer a combination -of the above mentioned memory models, which is referred to \textbf{Partitioned -Global Address Space} or PGAS for short. In PGAS, each processing -element (\ac{PE}) has access to its own private local memory and also to -a shared memory space. This programming model enhances performance -by exposing data/thread locality. PGAS programming languages include -\textbf{Unified Parallel \Clang (UPC)}, \textbf{Co-Array \Fortran (CAF)}, -\textbf{Titanium}, \textbf{X-10} and \textbf{Chapel}. - -More information about PGAS can be found at the PGAS Forum website.\cite{pgasfor} - - -\subsection{\openshmem} - -% SGI asked for this to be removed to protect the trademark -% -% SHMEM stands for \textbf{SH}ared \textbf{MEM}ory. - -\openshmem is a library \ac{API} that allows its participating processes (the -places where work occurs are called Processing Elements or \ac{PE}s) to -view a Partitioned Global Address Space. Each \ac{PE} is able to see -variables with a common name, but each \ac{PE} has its own local copy -of the variable. - -The \openshmem library provides inter-processor communication using data -passing and one-sided communication techniques. \openshmem differs from -the Message Passing Interface (MPI), currently the most widely used -communication model, in that the latter generally uses two-sided communication -(MPI now also includes one-sided calls). In two-sided communication, -both sides of the exchange (source and destination) are required to -participate actively. The one-sided communication mechanism decouples -data transfer and synchronization, reducing communication overhead, -resulting in faster communication patterns. Figure \ref{fig:Communication-Scheme} -shows diagrams for one-sided and two-sided communications.\medskip{} - - -%\begin{center} -%\begin{figure}[H] -%\begin{centering} -%\includegraphics[scale=0.7]{media/communication} -%\par\end{centering} -% -%\caption{Communication Scheme\label{fig:Communication-Scheme}} -%\end{figure} -% -%\par\end{center} - -\medskip{} -The following are some of the communication operations available in -\openshmem: -\begin{enumerate} -\item \textbf{Data Transfers } - -\begin{enumerate} -\item One-sided puts : the initiator \ac{PE} (active side) specifies the local -data to be written to the target \ac{PE}'s (passive side) memory. -\item One-sided gets : an explicit fetch operation is used to copy a variable -amount of data from a remote process and store it locally.\end{enumerate} -\begin{description} -\item [{{Note:}}] By avoiding the need for matching send and receive -calls, \openshmem simplifies the communication process by reducing the -number of calls required to have one \ac{PE} interact with other \ac{PE}s. -\end{description} -\item \textbf{Synchronization Mechanisms } - -\begin{enumerate} -\item Fence: Ensures ordering of PUT operations to a specific \ac{PE}. -\item Quiet: Ensures ordering of PUT operations to all \ac{PE}s. -\item Barrier: A collective synchronization routine in which no \ac{PE} may leave -the barrier prior to all \ac{PE}s entering the barrier. -\end{enumerate} -\item \textbf{Collective Communication} - -\begin{enumerate} -\item Broadcast: Copy a block of data from one \ac{PE} to one or more target -\ac{PE}s. -\item Collection: Concatenate elements from the source array to a target -array over the specified \ac{PE}s. -\item Reduction: Perform an associative binary operation over the specified -\ac{PE}s. -\end{enumerate} -\item \textbf{Address Manipulation} - -\begin{enumerate} -\item Allocating and deallocating memory blocks in the symmetric space. -\end{enumerate} -\item \textbf{Locks} - -\begin{enumerate} -\item Implementation of mutual exclusion. -\end{enumerate} -\item \textbf{Atomic Memory Operations} - -\begin{enumerate} -\item Swap, Conditional Swap, Add and Increment -\end{enumerate} -\item \textbf{Data Cache control} - -\begin{enumerate} -\item Implementation of mechanisms to exploit the capabilities of hardware -cache if available. -\end{enumerate} -\end{enumerate} -\begin{description} -\item [{{Note:}}] More information about \openshmem routines can be found -in the Library Routines section. -\end{description} - -\subsection{History of \openshmem} -\begin{description} -\item [{{Cray~SHMEM~(MP-SHMEM,~LC-SHMEM):}}] Cray first introduced -SHMEM in 1993 for its Cray T3D systems. Cray SHMEM was also used in -other models: T3E, PVP and XT series. -\item [{{SGI~SHMEM~(SGI-SHMEM):}}] Cray Research merged with Silicon -Graphics (SGI) in February 1996. At this point SHMEM was incorporated -into SGI's Message Passing Toolkit (MPT). The platforms supported -were - SGI Irix, Origin and Altix. -\item [{{Quadrics~SHMEM~(Q-SHMEM):}}] an optimized API for the Quadrics -QsNet interconnect. It included SGI extensions and provided non-blocking -puts and gets. A joint effort from HCS Lab \& Quadrics incorporated -a program profiling interface called PSHMEM that can aid in the execution -analysis of SHMEM programs. -\end{description} -The success of SHMEM's performance attracted several vendors to provide -implementations (with varying names and features) for their systems. -Some of them include: -\begin{description} -\item [{{HP~SHMEM:}}] Based on the Quadrics API. It is included in the -UPC product kit. -\item [{{Cyclops-64~SHMEM~(C64-SHMEM):}}] this SHMEM API supports the -Cyclops-64 architecture. Most of the core features of Cray SHMEM are -available with some additional interfaces specific to the Cyclops-64 -architecture. % -%\begin{comment} -%Forcing new line, can't get \LaTeX{} to do it automatically. -%\end{comment} - -\item [{{IBM~SHMEM:}}] An implementation created by IBM intended for -internal use only. -\item [{{TurboSHMEM:}}] This implementation uses IBM's Low-Level \ac{API} -(LAPI) technology to obtain optimized one-sided communication for -the put/get operations. This allows applications written with the -SHMEM API to run on IBM platforms with minimal source code changes. -\item [{{GPSHMEM:}}] This implementation of SHMEM aims at providing full -portability of applications. It is built mostly with Cray T3D components -and functionalities and provides MPI and ARMCI support. This project -is no longer maintained. \end{description} diff --git a/_deprecated_sources/INTRODUCTION/language_bindings.tex b/_deprecated_sources/INTRODUCTION/language_bindings.tex deleted file mode 100644 index 176f7ae50..000000000 --- a/_deprecated_sources/INTRODUCTION/language_bindings.tex +++ /dev/null @@ -1,32 +0,0 @@ -\section{Language Bindings and Conformance} -\label{subsec:bindings} -%\openshmem is available with \Clang{} and \Fortran{} bindings. The \Cpp{} -%interface is currently the same as that for \Clang. An \openshmem implementation can be conformant to one or both of the -%interfaces. An implementation that provides e.g.\ only a \Clang{} interface may claim to conform to the \openshmem specification with respect to -%the \Clang{} language, but not to \Fortran{} and should make this clear in its documentation. An implementation that provides both \Clang{} and \Fortran{} bindings may claim -%complete conformance. - -\openshmem provides ISO \Clang{} and \Fortran{} \textit{90} language bindings. Any implementation that provides both \Clang{} and \Fortran{} bindings can claim conformance to the specification. An implementation that provides e.g.\ only a \Clang{} interface may claim to conform to the \openshmem specification with respect to -the \Clang{} language, but not to \Fortran{}, and should make this clear in its documentation. The \openshmem header files for \Clang{} and \Fortran{} must contain only the interfaces and constant names defined in this specification. - -\openshmem{} \ac{API}s can be implemented as either routines or macros. However, implementing the interfaces using macros is strongly discouraged as this could severely limit the use of external profiling tools and high-level compiler optimizations. An \openshmem{} program should avoid defining routine names, variables, or -identifiers with the prefix \shmemprefix{} (for \Clang{} and \Fortran{}), \shmemprefixC{} (for \Clang{}) or with \openshmem \ac{API} names. - -All \openshmem extension \ac{API}s that are not part of this specification must be defined in the \FUNC{shmemx.h} include file. These extensions shall use the \FUNC{shmemx\_} prefix for all routine, variable, and constant names. - -%The \openshmem{} constants and environment variables are in all capital letters. -%All \openshmem{} functions are prefixed with \shmemprefix{}, besides these -%expections : start\_pes{}, shfree{}, shpalloc{}, shpclmove{}, shpdellc{}. -%\begin{itemize} -%\item start\_pes{} -%\item shfree{} -%\item shpalloc{} -%\item shpclmove{} -%\item shpdellc{} -%\end{itemize} - - -% -%The \openshmem{} -%functions does not return any error code. - diff --git a/_deprecated_sources/INTRODUCTION/library_constants.tex b/_deprecated_sources/INTRODUCTION/library_constants.tex deleted file mode 100644 index ae8325d6b..000000000 --- a/_deprecated_sources/INTRODUCTION/library_constants.tex +++ /dev/null @@ -1,113 +0,0 @@ -\section{Library Constants} -%\color{red} -%\subsection*{\sout{Constants Related To Collective Operations}} -%\sout{Below are the library constants for collective operations.} -%Ticket \#107 -%\color{black} -The constants that start with SHMEM\_* are for \Fortran{}. For backwards -compatibility, the \CorCpp constants may start with either \_SHMEM\_* or SHMEM\_*. -\newline -\newline -\begin{tabular}{|p{0.4\textwidth}|p{0.5\textwidth}|} -\hline -\textbf{Constant} & \textbf{Description} -\tabularnewline -\hline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_BCAST\_SYNC\_SIZE}} -\hbox{\hspace*{12mm} \const{SHMEM\_BCAST\_SYNC\_SIZE}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_BCAST\_SYNC\_SIZE}}} -& Length of the \VAR{pSync} arrays needed for broadcast routines. The value -of this constant is implementation specific. Refer to the \hyperref[subsec:shmem_broadcast]{Broadcast Routines} section under \hyperref[sec:openshmem_library_api]{Library Routines} for more information -about the usage of this constant. \tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_SYNC\_VALUE}} -\hbox{\hspace*{12mm} \const{SHMEM\_SYNC\_VALUE}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_SYNC\_VALUE}}} -& Holds the value used to initialize the elements of \VAR{pSync} arrays. The -value of this constant is implementation specific.\tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_REDUCE\_SYNC\_SIZE}} -\hbox{\hspace*{12mm} \const{SHMEM\_REDUCE\_SYNC\_SIZE}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_REDUCE\_SYNC\_SIZE}}} -& Length of the work arrays needed for reduction routines. The value -of this constant is implementation specific. Refer to the \hyperref[subsec:shmem_reductions]{Reduction Routines} section under \hyperref[sec:openshmem_library_api]{Library Routines} for more information -about the usage of this constant.\tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_BARRIER\_SYNC\_SIZE}} -\hbox{\hspace*{12mm} \const{SHMEM\_BARRIER\_SYNC\_SIZE}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_BARRIER\_SYNC\_SIZE}}} -& Length of the work array needed for barrier routines. The value -of this constant is implementation specific. Refer to the \hyperref[subsec:shmem_barrier]{Barrier Synchronization Routines} section under \hyperref[sec:openshmem_library_api]{Library Routines} -for more information about the usage of this constant.\tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_COLLECT\_SYNC\_SIZE}} -\hbox{\hspace*{12mm} \const{SHMEM\_COLLECT\_SYNC\_SIZE}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_COLLECT\_SYNC\_SIZE}}} -& Length of the work array needed for collect routines. The value -of this constant is implementation specific. Refer to the \hyperref[subsec:shmem_collect]{Collect Routines} section under \hyperref[sec:openshmem_library_api]{Library Routines} for more information about the usage of this constant.\tabularnewline -\hline -\end{tabular} - -\begin{tabular}{|p{0.4\textwidth}|p{0.5\textwidth}|} -\hline -\tabularnewline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}} -\hbox{\hspace*{12mm} \const{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}}} -& Minimum length of work arrays used in various collective routines.\tabularnewline -\hline -%\color{red} -%\vtop{\hbox{} -%\hbox{\hspace*{12mm} \const{}} -%\hbox{} -%\hbox{\hspace*{12mm} \const{}}} -%& \color{red} -%Ticket \#107 \tabularnewline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_MAJOR\_VERSION}} -\hbox{\hspace*{12mm} \const{SHMEM\_MAJOR\_VERSION}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_MAJOR\_VERSION}}} -& -Integer representing the major version of \openshmem{} standard in use. \tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_MINOR\_VERSION}} -\hbox{\hspace*{12mm} \const{SHMEM\_MINOR\_VERSION}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_MINOR\_VERSION}}} -& -Integer representing the minor version of \openshmem{} standard in use. \tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_MAX\_NAME\_LEN}} -\hbox{\hspace*{12mm} \const{SHMEM\_MAX\_NAME\_LEN}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_MAX\_NAME\_LEN}}} -& -Integer representing the length of vendor string. \tabularnewline -\hline -\vtop{\hbox{\CorCpp:} -\hbox{\hspace*{12mm} \const{\_SHMEM\_VENDOR\_STRING}} -\hbox{\hspace*{12mm} \const{SHMEM\_VENDOR\_STRING}} -\hbox{\strut \Fortran:} -\hbox{\hspace*{12mm} \const{SHMEM\_VENDOR\_STRING}}} -& -String representing the string of length less than \const{SHMEM\_MAX\_NAME\_LEN} . \tabularnewline -\hline - -\end{tabular} -\color{black} diff --git a/_deprecated_sources/INTRODUCTION/locks_intro.tex b/_deprecated_sources/INTRODUCTION/locks_intro.tex deleted file mode 100644 index 08ef9665e..000000000 --- a/_deprecated_sources/INTRODUCTION/locks_intro.tex +++ /dev/null @@ -1,3 +0,0 @@ -The following section discusses \openshmem locks as a mechanism to provide mutual exclusion. Three routines are available for distributed locking, -%namely, -\textit{set, test} and \textit{clear}. \ No newline at end of file diff --git a/_deprecated_sources/INTRODUCTION/memory_management_intro.tex b/_deprecated_sources/INTRODUCTION/memory_management_intro.tex deleted file mode 100644 index b204b7385..000000000 --- a/_deprecated_sources/INTRODUCTION/memory_management_intro.tex +++ /dev/null @@ -1 +0,0 @@ -\openshmem provides a set of \ac{API}s for managing the symmetric heap. The \ac{API}s allow one to dynamically allocate, deallocate, reallocate and align symmetric data objects in the symmetric heap, in \Clang{} and \Fortran. diff --git a/_deprecated_sources/INTRODUCTION/memory_model.tex b/_deprecated_sources/INTRODUCTION/memory_model.tex deleted file mode 100644 index 5c4859c34..000000000 --- a/_deprecated_sources/INTRODUCTION/memory_model.tex +++ /dev/null @@ -1,68 +0,0 @@ -%Outline -%%Exectution model -% *Define what is a OpenSHMEM program: a set of processes (either SPMD or MIMD?) where each process has its own 'local' (private) memory and symmetric memory regions that may be accessible by any PEs. -% *Each OpenSHMEM process is called a processing element (PE) -% *Each PE may be mapped to many to one hardware cores/threads or less. -% *The number of PEs is specified at launch/runtime. -% *Each PE must call startpe to initialize the OpenSHMEM runtime, before any other call for OpenSHMEM. There is an implicit barrier at startpe. -% *Each PE executes asynchronously following Fortran or program execution in C [ISO/IEC00 Sec. 5.1.2.3] -% *Each PE will have a unique global identifier and the execution of a program may depend on the PE id, if executed in SPMD. -% *PE id may be used for library calls synchronizations, control flow constructs language in C/Fortran -% *PE may allocate symmetric data objects via a symmetric heap -% *As of now, PEs may finish execution at any time by returning from the main function. (no call to shmem_finalize yet!) -% -% %Memory model -% *Each OpenSHMEM PEs may have symmetric memory that is accessible by other PEs. -% *Symmetric memory is a region of memory where all the an instance of a data objects is replicated across PEs, have -% the same the same layout and relative offset. -% *All PEs can allocate a symmetric data objects using the symmetric heap, but they must do so as a collective operation. (is there a barrier after shmalloc?) -% *All writes to symmetric memory are relaxed (I'm not sure if this is the completion semantics) and are guaranteed to be visible to other PEs after a barrier_all, barrier(?), quiet, (what about wait? does it means iti sonly visible to me?) -% *Calls to barrier, barrier_all, quiet, wait, lock, atomics, are meant to guarantee memory consistency across PEs. -% *Read/Writes to symmetric data object may appear after startpe or after a the symmetric data object has been allocated in the symmetric heap (if it is a dynamic). -% *Operations like reduction, collect, etc guarantee memory consistency after completion(?) -% *Data races are possible in OpenSHMEM if multiple PEs write/read a symmetric data object from a single PE without proper synchronization. -% -%This comes from the UPC spec: -%The memory consistency model in a language defines the order in which the results of write operations may be observed through read operations. -%The behavior of a OpenSHMEM program may depend on the timing of accesses to symetric variables on PEs, so in general a program defines a set of possible executions, -%rather than a single execution. The memory consistency model constrains the set of possible executions for a given program; the user may then rely -%on properties that are true of all of those executions. - -\section{Memory Model} -\begin{figure}[h] -\includegraphics[width=0.95\textwidth]{diagrams/updated/mem_model} -\caption{\OSH{} Memory Model} -\label{fig:mem_model} -\end{figure} -An \openshmem program consists of data objects that are private to each \ac{PE} and data objects that are remotely accessible by all \ac{PE}s. Private data objects are stored in the local memory of each \ac{PE} and can only be accessed by the \ac{PE} itself; these data objects cannot be accessed by other \ac{PE}s via \openshmem routines. Private data objects follow the memory model of \Clang{} or \Fortran{}. Remotely accessible objects, however, can be accessed by remote \ac{PE}s using \openshmem routines. -Remotely accessible data objects are called \emph{Symmetric Objects}. -%An object is symmetric if it has a corresponding object with the same -%SP: No, if there is a way to create such objects without them being global/static/common/save or shmalloced/shpalloced would they would be symmetric as per this definition...NOT the way OpenSHMEM defines. -%All symmetric data objects have a corresponding object with the same -%name, type, size, and offset (from an arbitrary memory address) on all \ac{PE}s. Symmetric objects are accessible by all executing \ac{PE}s via the \openshmem \ac{API}. -Each symmetric data object has a corresponding object with the same name, type, size, and offset (from an arbitrary memory address) on all PEs where that object is accessible via the \openshmem \ac{API}. (For the definition of what is accessible, see the descriptions for \FUNC{shmem\_pe\_accessible} and \FUNC{shmem\_addr\_accessible} in sections \ref{subsec:shmem_pe_accessible} and \ref{subsec:shmem_addr_accessible}.) -Symmetric data objects accessed via typed \openshmem interfaces are required to be natural aligned based on their type requirements and underlying architecture. -In \openshmem{} the following kinds of data objects are symmetric: -\begin{itemize} - \item \Fortran{} data objects in common blocks or with the SAVE attribute. These data objects must not be defined in a dynamic shared object (DSO). - \item Global and static \Clang{} and \Cpp{} variables. These data objects must not be defined in a DSO. - \item \Fortran{} arrays allocated with \textit{shpalloc} - \item \Clang{} and \Cpp{} data allocated by \textit{shmem\_malloc} -\end{itemize} - -%Symmetric Objects -%are static and global variables in \Clang{} and \Cpp, which are often allocated -%at the same address on all \ac{PE}s where the program is being executed -%(\emph{e.g.} in the ELF executable format). -%See Figure \ref{fig:SymmetricHeap1} -%for an example of how Symmetric Memory Objects may be arranged in -%memory. -\openshmem dynamic memory allocation routines (\textit{shpalloc} and \textit{shmem\_malloc}) allow collective allocation of \emph{Symmetric Data Objects} on a special memory region called the \emph{Symmetric Heap}. The Symmetric Heap is created during the execution of a program at a memory location determined by the implementation. The Symmetric Heap may reside in different memory regions on different \ac{PE}s. Figure~\ref{fig:mem_model} shows how \openshmem implements a \ac{PGAS} model using remotely accessible symmetric objects and private data objects when executing an \openshmem program. Symmetric data objects are stored on the symmetric heap or in the global/static memory section of each \ac{PE}. - -%Symmetric data objects can be allocated dynamically in the symmetric heap of each \ac{PE} using -%a collective \FUNC{shmalloc} or \FUNC{shpalloc} memory allocation call. - -%\openshmem specification does not require a particular memory layout; it is up to the implementation -%to decide how to implement the symmetric heap. -%Objects that reside in the private address space can only be accessed by the \ac{PE} itself; these data objects -%cannot be accessed by other \ac{PE}s via \openshmem routines. diff --git a/_deprecated_sources/INTRODUCTION/mpi_interoperability.tex b/_deprecated_sources/INTRODUCTION/mpi_interoperability.tex deleted file mode 100644 index c90be3690..000000000 --- a/_deprecated_sources/INTRODUCTION/mpi_interoperability.tex +++ /dev/null @@ -1,11 +0,0 @@ -\section{\ac{MPI} Interoperability} -\begin{sloppypar} %SP: to prevent constants from running into margins. -\openshmem routines can be used in conjunction with \ac{MPI} -routines in the same program. For example, on SGI systems, programs that use both \ac{MPI} and \openshmem routines call \FUNC{MPI\_Init} and \FUNC{MPI\_Finalize} but omit the call to the \FUNC{shmem\_init} routine. \openshmem \ac{PE} numbers are equal to the \ac{MPI} rank within the \CONST{MPI\_COMM\_WORLD} environment variable. Note that this precludes use of \openshmem routines between processes in different \CONST{MPI\_COMM\_WORLD}s. -\ac{MPI} processes started using the \FUNC{MPI\_Comm\_spawn} routine, for -example, cannot use \openshmem routines to communicate with their parent -\ac{MPI} processes. -\end{sloppypar} -On SGI systems where \ac{MPI} jobs use TCP/sockets for inter-host communication, \openshmem routines can be used to communicate with processes running on the same host. The \FUNC{shmem\_pe\_accessible} routine can be used to determine if a remote \ac{PE} is accessible via \openshmem communication from the local \ac{PE}. When running an \ac{MPI} program involving multiple executable files, \openshmem routines can be used to communicate with processes running from the same or different executable files, provided that the communication is limited to symmetric data objects. On these systems, static memory such as a \Fortran{} common block or \Clang{} global variable, is symmetric between processes running from the same executable file, but is not symmetric between processes running from different executable files. Data allocated from the symmetric heap (\FUNC{shmem\_malloc} or \FUNC{shpalloc}) is symmetric across the same or different executable files. The routine \FUNC{shmem\_addr\_accessible} can be used to determine if a local address is accessible via \openshmem communication from a remote \ac{PE}. - - Another important feature of these systems is that the \FUNC{shmem\_pe\_accessible} routine returns \CONST{TRUE} only if the remote \ac{PE} is a process running from the same executable file as the local PE, indicating that full \openshmem support (static memory and symmetric heap) is available. When using \openshmem routines within an \ac{MPI} program, the use of \ac{MPI} memory placement environment variables is required when using non-default memory placement options. diff --git a/_deprecated_sources/INTRODUCTION/new_execution_model.tex b/_deprecated_sources/INTRODUCTION/new_execution_model.tex deleted file mode 100644 index 0fa0e6adf..000000000 --- a/_deprecated_sources/INTRODUCTION/new_execution_model.tex +++ /dev/null @@ -1,141 +0,0 @@ -%Outline -%%Exectution model -% *Define what is a OpenSHMEM program: a set of processes (either SPMD or MIMD?) where each process has its own 'local' (private) memory and symmetric memory regions that may be accessible by any PEs. -% *Each OpenSHMEM process is called a processing element (PE) -% *Each PE may be mapped to many to one hardware cores/threads or less. -% *The number of PEs is specified at launch/runtime. -% *Each PE must call startpe to initialize the OpenSHMEM runtime, before any other call for OpenSHMEM. There is an implicit barrier at startpe. -% *Each PE executes asynchronously following Fortran or program execution in C [ISO/IEC00 Sec. 5.1.2.3] -% *Each PE will have a unique global identifier and the execution of a program may depend on the PE id, if executed in SPMD. -% *PE id may be used for library calls synchronizations, control flow constructs language in C/Fortran -% *PE may allocate symmetric data objects via a symmetric heap during execution%SP: Does not cover global and static. -% *As of now, PEs may finish execution at any time by returning from the main function. (no call to shmem_finalize yet!) -% -%This comes from the UPC spec: -%The memory consistency model in a language defines the order in which the results of write operations may be observed through read operations. -%The behavior of a OpenSHMEM program may depend on the timing of accesses to symetric variables on PEs, so in general a program defines a set of possible executions, -%rather than a single execution. The memory consistency model constrains the set of possible executions for a given program; the user may then rely -%on properties that are true of all of those executions. - - -\section{Execution Model} -\label{subsec:execution_model} -%An \openshmem{} program consists of a set of processes, called \ac{PE}s, that execute in a \ac{SPMD}-like execution model. In \openshmem different \ac{PE}s can have different execution paths and will execute asynchronously -%following \Fortran{} or \Clang{} program execution. The \ac{PE}s progress independently, and can communicate and synchronize using the \openshmem{} \ac{API}. -%The number of \ac{PE}s in the \openshmem{} program is specified at runtime by the user. %A \ac{PE} can be implemented as an OS process or OS thread~\footnote{As long as the memory model and execution model of \openshmem is followed.}. -%The total number of \ac{PE}s, \VAR{N}, can be mapped to \VAR{M} hardware cores/threads where \VAR{M} can be less or equal than \VAR{N}. %As long as the memory model and execution model of \openshmem is followed. -%An \openshmem program must start by calling the initialization function \FUNC{start\_pes} before using any of the other \openshmem library routines. \ac{PE}s do not exist until after the call to \FUNC{start\_pes} returns. -%During execution, each \ac{PE} is assigned a unique global identifier for the duration of the program. These \ac{PE} identifiers are integers assigned in a monotonically increasing manner from zero to the total number of \ac{PE}s minus 1. \ac{PE} identifiers are used for other \openshmem library calls (e.g. to access symmetric objects from specific \ac{PE}s, collective synchronization) or to dictate a control flow for \ac{PE}s using constructs of -%\Clang{} or \Fortran. As of now, an \openshmem program finishes execution by returning from the main function. It is up to the implementation on how to handle the finalization of the \openshmem library and any other -%resources initialized by the library: there is currently no explicit finalization call defined in the \openshmem specification. - -An \openshmem{} program consists of a set of \openshmem{} processes called \ac{PE}s that execute in a \ac{SPMD}-like model where each \ac{PE} can take a different execution path. A \ac{PE} can be implemented using an OS process or an OS thread\footnote{Implementing \ac{PE}s using OS threads requires compiler techniques to implement the \openshmem{} memory model.}. The \ac{PE}s progress asynchronously, and can communicate/synchronize -via the \openshmem{} interfaces. All \ac{PE}s in an \openshmem{} program should start by calling the initialization routine \FUNC{shmem\_init} \footnote{\textbf{start\_pes} has been deprecated as of Specification 1.2} before using any of the other \openshmem{} library routines. -%SP: Spec 1.2, ticket 108, text from Oscar. -%As of now, an \openshmem program finishes execution by returning from the main function. On program exit, \openshmem must complete all pending communication and release all the resources associated to the library using an implicit collective synchronization across \ac{PE}s. -%On program exit, \openshmem can release all the resources associated to the library. -% It is up to the implementation on how to handle the finalization of the \openshmem library. -An \openshmem program finishes execution by returning from the main routine or when any PE calls \FUNC{shmem\_global\_exit}. When returning from main, \openshmem must complete all pending communication and release all the resources associated to the library using an implicit collective synchronization across PEs. The -user has the option to call \FUNC{shmem\_finalize} (before returning from main) to complete all pending communication and release all the \openshmem library resources without terminating the program. Calling any \openshmem routine after \FUNC{shmem\_finalize} leads to undefined behavior. - -The \ac{PE}{}s of the \openshmem{} program are identified by unique integers. The identifiers are integers assigned in a monotonically increasing manner from zero to the total number of \ac{PE}s minus 1. \ac{PE} identifiers are used for \openshmem{} calls (e.g. to specify \PUT{} or \GET{} routines on symmetric data objects, collective synchronization calls) or to dictate a control flow for \ac{PE}s using constructs of \Clang{} or \Fortran. The identifiers are fixed for the life cycle of the \openshmem{} program. -%on exit implementation are expected to release resources associated to the library - %following \Fortran{} or \Clang{} program execution. -%Each \ac{PE} can be implemented as an OS process or OS thread as long as the constraints imposed -%by the memory model and execution model are respected. -%The \ac{PE} in turn is mapped to either a processor core or a hardware thread. \rcomment{Manju: This sentence requires scrutiny}. -%Though all \ac{PE}s are required to execute the same program, each \ac{PE} is allowed to take -%a different control path. The \ac{PE}s progress independently, and can communicate and synchronize using the \openshmem{} interfaces. - -%The life cycle of \openshmem{} program starts with each \ac{PE} calling a global -%collective routine start\_pes{}, and ends with implementation dependent -%finalization. -%All \ac{PE}s in an \openshmem{} program should start by calling -%the initialization function start\_pes before using any of the other \openshmem{} -%library routines. A \ac{PE}{} calling start\_pes{} call more than once in the lifetime of program can result in an undefined behavior. The current specification does not define the finalization of \openshmem{}program. The implementations are allowed to provide their own interfaces for finalization as long as it is not required for the correct functioning of the \openshmem{} program. -%The \ac{PE}{}s of the \openshmem{} program are identified by unique integers. -%The identifiers are integers assigned in a monotonically increasing manner from zero to the total number of \ac{PE}s minus 1. \ac{PE} identifiers are used for other \openshmem{} library calls (such as collective synchronization) or to dictate a control flow for \ac{PE}s using constructs of \Clang{} or \Fortran. The identifiers are fixed for the life cycle of the \openshmem{} program. -%on exit implementation are expected to release resources associated to the library - -\subsection{Progress of \openshmem Operations} -\label{subsec:progress} -The \openshmem model assumes that computation and communication are naturally overlapped. \openshmem programs are expected to exhibit progression of communication both with and without \openshmem calls. Consider a \ac{PE} that is engaged in a computation with no \openshmem calls. Other \ac{PE}s should be able to communicate (\OPR{put}, \OPR{get}, \OPR{collective}, \OPR{atomic}, etc) and complete communication operations with that computationally-bound \ac{PE} without that \ac{PE} issuing any explicit \openshmem calls. \openshmem communication calls involving that \ac{PE} should progress regardless of when that \ac{PE} next engages in an \openshmem call. - -\textbf{Note to implementors:} - -\begin{itemize} -\item An \openshmem implementation for hardware that does not provide asynchronous communication capabilities may require a software progress thread in order to process remotely-issued communication requests without explicit program calls to the \openshmem library. \item High performance implementations of \openshmem are expected to leverage hardware offload capabilities and - provide asynchronous one-sided communication without software assistance. -\item Implementations should avoid deferring the execution of one-sided operations until a synchronization point where data is known to be available. High-quality implementations should attempt asynchronous delivery whenever possible, for performance reasons. Additionally, the \openshmem community discourages releasing \openshmem implementations that do not provide asynchronous one-sided operations, as these have very limited performance value for \openshmem programs. -\end{itemize} - -% -%ORIGINAL TEXT -%The \openshmem model assumes that computation and communication are -%naturally overlapped, and that all data transfers eventually complete. The OpenSHMEM execution model assumes that computation and communication are naturally overlapped. -%OpenSHMEM programs are expected to exhibit progression of communication both with and without OpenSHMEM calls. -% -%\textbf{Note to implementors:} while delivery can be deferred, for example until a synchronization point at which data is known to be available, high-quality implementations should attempt asynchronous delivery, whenever possible, for performance reasons. Progress can be ensured through the use of a dedicated progress thread in software, or through network hardware that offloads communication handling from processors, for example. - -%High quality \openshmem implementations must insure that programs exhibit %SP: Changing MUST to may as per discussion on 01/31/2014 -%progression of communication both with and without \openshmem calls. - -% A high quality \openshmem{} implementation may ensure that communication will -% progress without requiring \openshmem{} calls. - -%Consider a \ac{PE} that is engaged in a long computation with no \openshmem calls: other \ac{PE}s must be able to communicate (e.g. \PUT{}/\GET{}, -%collective, atomic operations) with that computationally-bound \ac{PE} without that \ac{PE} -%issuing any explicit \openshmem calls. \openshmem communication calls involving that \ac{PE} must progress -%regardless of when that \ac{PE} next engages in an \openshmem call. - -%\textbf{Note to implementers:} progress will often be ensured through -%the use of a dedicated progress thread in software, or through -%network hardware that offloads communication handling from processors, for example. -%SP: Why only communication ? Shouldn't t be for all openshmem calls? -% TC: separating out communication seemed to be the relevant comment here, as -% opposed to other offloads -%\subsection{Using the Symmetric \VAR{Work} and \VAR{pSync} Arrays} - -%Multiple \VAR{pSync} arrays are often needed if a particular \ac{PE} calls a \openshmem -%collective function twice without intervening barrier synchronization. -%Problems would occur if some \ac{PE}s in the \activeset{} for call 2 arrive at -%call 2 before processing of call 1 is complete by all \ac{PE}s in the call 1 -%\activeset. You can use \FUNC{shmem\_barrier} or \FUNC{shmem\_barrier\_all} to -%perform a barrier synchronization between consecutive calls to \openshmem -%collective functions. There are two special cases: -%\begin{itemize} -%\item The \FUNC{shmem\_barrier} function allows the same \VAR{pSync} array to be used -% on consecutive calls as long as the active \ac{PE} set does not change. -%\item If the same collective function is called multiple times with the -% same \activeset, the calls may alternate between two \VAR{pSync} arrays. -% The \openshmem functions guarantee that a first call is completely finished by -% all \ac{PE}s by the time processing of a third call begins on -% any \ac{PE}. -%\end{itemize} -%Because the \openshmem functions restore \VAR{pSync} to its original contents, -%multiple calls that use the same \VAR{pSync} array do not require that \VAR{pSync} -%be reinitialized after the first call. - -\subsection{Atomicity Guarantees} -\label{subsec:amo_guarantees} -\openshmem contains a number of routines that operate on symmetric data -atomically (Section \ref{sec:amo}). These routines guarantee that accesses by \openshmem's atomic operations will be exclusive, but do not guarantee exclusivity -in combination with other routines, either inside \openshmem or outside. - -For example: during the execution of an atomic remote integer increment -operation on a symmetric variable \VAR{X}, no other \openshmem atomic -operation may access \VAR{X}. After the increment, \VAR{X} will have -increased its value by \CONST{1} on the destination \ac{PE}, at which point other -atomic operations may then modify that \VAR{X}. -However, access to the symmetric object \VAR{X} with non-atomic operations, such as one-sided \PUT{} or \GET{} operations, will \OPR{invalidate} the atomicity guarantees. - -% %Memory model -% *Each OpenSHMEM PEs may have symmetric memory that is accessible by other PEs. -% *Symmetric memory is a region of memory where all the an instance of a data objects is replicated across PEs, have -% the same the same layout and relative offset. -% *All PEs can allocate a symmetric data objects using the symmetric heap, but they must do so as a collective operation. (is there a barrier after shmalloc?) -% *All writes to symmetric memory are relaxed (I'm not sure if this is the completion semantics) and are guaranteed to be visible to other PEs after a barrier_all, barrier(?), quiet, (what about wait? does it means iti sonly visible to me?) -% *Calls to barrier, barrier_all, quiet, wait, lock, atomics, are meant to guarantee memory consistency across PEs. -% *Read/Writes to symmetic data object may appear after startpe or after a the symmetric data object has been allocated in the symmetric heap (if it is a dynamic). -% *Operations like reduction, collect, etc guarantee memory consistency after completion(?) -% *Data races are possible in OpenSHMEM if multiple PEs write/read a symmetric data object from a single PE without proper synchronization. diff --git a/_deprecated_sources/INTRODUCTION/openshmem_history.tex b/_deprecated_sources/INTRODUCTION/openshmem_history.tex deleted file mode 100644 index 138a855ad..000000000 --- a/_deprecated_sources/INTRODUCTION/openshmem_history.tex +++ /dev/null @@ -1,93 +0,0 @@ -SHMEM has a long history as a parallel programming -model, having been used extensively on a number of products since -1993, including Cray T3D, Cray X1E, the Cray XT3/4, SGI Origin, SGI -Altix, clusters based on the Quadrics interconnect, and to a very -limited extent, Infiniband based clusters. - -\begin{itemize} -\item A SHMEM Timeline - \begin{itemize} - \item Cray SHMEM - \begin{itemize} - \item SHMEM first introduced by Cray Research Inc. in 1993 for Cray T3D - \item Cray is acquired by SGI in 1996 - \item Cray is acquired by Tera in 2000 (MTA) - \item Platforms: Cray T3D, T3E, C90, J90, SV1, SV2, X1, X2, XE, XMT, XT - \end{itemize} - \item SGI SHMEM - \begin{itemize} - \item SGI purchases Cray Research Inc. and SHMEM was integrated into - SGI's Message Passing Toolkit (MPT) - \item SGI currently owns the rights to SHMEM and \openshmem - \item Platforms: Origin, Altix 4700, Altix XE, ICE, UV - \item SGI was purchased by Rackable Systems in 2009 - \item SGI and Open Source Software Solutions, Inc. (OSSS) signed a - SHMEM trademark licensing agreement, in 2010 - \end{itemize} - \item Other Implementations - \begin{itemize} - \item Quadrics (Vega UK, Ltd.) - \item Hewlett Packard - \item GPSHMEM - \item IBM - \item QLogic - \item Mellanox - % \item University of Houston - \item University of Florida - \end{itemize} - \end{itemize} -\item OpenSHMEM Implementations - \begin{itemize} - \item SGI \openshmem - \item University of Houston - \openshmem Reference Implementation - \item Mellanox ScalableSHMEM - \item Portals-SHMEM - \item IBM OpenSHMEM - \end{itemize} -\end{itemize} - - -%Despite being supported by a variety of vendors there is no standard -%defining the SHMEM memory model or programming interface. Consistencies -%(where they exist) and extensions across the various implementations have -%been driven by the needs of an enthusiastic user community. The lack of a -%SHMEM standard has allowed each implementation to differ in both interface -%and semantics from vendor to vendor and even product line to product line, -%which has to this point limited broader acceptance. - -%\begin{description} -%\item [{{Cray~SHMEM~(MP-SHMEM,~LC-SHMEM):}}] Cray first introduced -%SHMEM in 1993 for its Cray T3D systems. Cray SHMEM was also used in -%other models: T3E, PVP and XT series. -%\item [{{SGI~SHMEM~(SGI-SHMEM):}}] Cray Research merged with Silicon -%Graphics (SGI) in February 1996. At this point SHMEM was incorporated -%into SGI's Message Passing Toolkit (MPT). The platforms supported -%were - SGI Irix, Origin and Altix. -%\item [{{Quadrics~SHMEM~(Q-SHMEM):}}] an optimized API for the Quadrics -%QsNet interconnect. It included SGI extensions and provided non-blocking -%puts and gets. A joint effort from HCS Lab \& Quadrics incorporated -%a program profiling interface called PSHMEM that can aid in the execution -%analysis of SHMEM programs. -%\end{description} -%The success of SHMEM's performance attracted several vendors to provide -%implementations (with varying names and features) for their systems. -%Some of them include: -%\begin{description} -%\item [{{HP~SHMEM:}}] Based on the Quadrics API. It is included in the -%UPC product kit. -%\item [{{Cyclops-64~SHMEM~(C64-SHMEM):}}] this SHMEM API supports the -%Cyclops-64 architecture. Most of the core features of Cray SHMEM are -%available with some additional interfaces specific to the Cyclops-64 -%architecture. -% -%\item [{{IBM~SHMEM:}}] An implementation created by IBM intended for -%internal use only. -%\item [{{TurboSHMEM:}}] This implementation uses IBM's Low-Level API -%(LAPI) technology to obtain optimized one-sided communication for -%the put/get operations. This allows applications written with the -%SHMEM API to run on IBM platforms with minimal source code changes. -%\item [{{GPSHMEM:}}] This implementation of SHMEM aims at providing full -%portability of applications. It is built mostly with Cray T3D components -%and functionalities and provides MPI and ARMCI support. This project -%is no longer maintained. -%\end{description} diff --git a/_deprecated_sources/INTRODUCTION/ordering_intro.tex b/_deprecated_sources/INTRODUCTION/ordering_intro.tex deleted file mode 100644 index ac8fc9f59..000000000 --- a/_deprecated_sources/INTRODUCTION/ordering_intro.tex +++ /dev/null @@ -1,2 +0,0 @@ -The following section discusses \openshmem \ac{API}s that provide mechanisms to ensure ordering and/or delivery of \OPR{Put}, \ac{AMO}, and memory store routines to symmetric data -objects. \ No newline at end of file diff --git a/_deprecated_sources/INTRODUCTION/original_language_bindings.tex b/_deprecated_sources/INTRODUCTION/original_language_bindings.tex deleted file mode 100644 index f2db6e94c..000000000 --- a/_deprecated_sources/INTRODUCTION/original_language_bindings.tex +++ /dev/null @@ -1,7 +0,0 @@ -\section{Language Bindings and Conformance} - -\openshmem is available with \Clang{} and \Fortran{} bindings. The \Cpp{} -interface is currently the same as that for \Clang. An \openshmem implementation can be conformant to one or both of the -interfaces. An implementation that provides e.g.\ only a \Clang{} interface may claim to conform to the \openshmem specification with respect to -the \Clang{} language, but not to \Fortran{} and should make this clear in its documentation. An implementation that provides both \Clang{} and \Fortran{} bindings may claim -complete conformance. diff --git a/_deprecated_sources/INTRODUCTION/osh_project.tex b/_deprecated_sources/INTRODUCTION/osh_project.tex deleted file mode 100644 index e4f822c2f..000000000 --- a/_deprecated_sources/INTRODUCTION/osh_project.tex +++ /dev/null @@ -1,8 +0,0 @@ -\section{The \openshmem Effort} - -\openshmem is a \ac{PGAS} library interface specification. \openshmem aims to provide a standard \ac{API} for SHMEM libraries to aid portability and facilitate uniform predictable results of \openshmem programs by explicitly stating the behavior and semantics of the \openshmem library calls. Through the different versions, \openshmem will continue to address the requirements of the \ac{PGAS} community. -As of this specification, existing vendors are moving towards \openshmem compliant implementations and new vendors are developing \openshmem library implementations to help the users write portable \openshmem code. This ensures that programs can run on multiple platforms without having to deal with subtle vendor-specific implementation differences. For more details on the history of -\openshmem please refer to \hyperref[sec:openshmem_history]{The History of \openshmem} section. - -%\footnote{The \openshmem specification is owned by Open Source Software Solutions LLC, a non-profit organization.} -The \openshmem\footnote{The \openshmem specification is owned by Open Source Software Solutions Inc., a non-profit organization, under an agreement with SGI.} effort is driven by the \ac{ESSC} at \ac{ORNL} and the University of Houston with significant input from the \openshmem{} community. Besides the specification, the effort also includes providing a reference \openshmem implementation, validation and verification suites, tools, a mailing list and website infrastructure to support specification activities. For more information please refer to: \url{http://www.openshmem.org/}. diff --git a/_deprecated_sources/INTRODUCTION/overview.tex b/_deprecated_sources/INTRODUCTION/overview.tex deleted file mode 100644 index 8c5de8282..000000000 --- a/_deprecated_sources/INTRODUCTION/overview.tex +++ /dev/null @@ -1,74 +0,0 @@ -\section{Overview} -\openshmem is a \ac{PGAS} library interface specification. In the \ac{PGAS} model each process has a local and -globally shared memory where portions of the shared memory may have affinity to a particular process. \openshmem -implements \ac{PGAS} using symmetric memory to share information among processes or \ac{PE}s. -It provides interfaces to perform communication and synchronization operations on both local and symmetric memory. -\openshmem is a library and unlike UPC, CAF, Titanium, X10 and Chapel, which are all -PGAS languages, it relies on the programmer to use the library calls correctly. - -The \openshmem specification is a result of effort to standardize widely available SHMEM implementations. -The initial goal of the specification is to consolidate existing vendor SHMEM implementations, and pave the way for -a defacto standard for SHMEM with community involvement. This enables portability of SHMEM programs, and -allows vendors to build and optimize their hardware architecture for OpenSHMEM. - -The \openshmem specification defines routines, constants, variables, and language bindings for \Clang{} and \Fortran. -Some of important \openshmem operations are as follows: - -\begin{enumerate} -\item \textbf{Data Transfers } - -\begin{enumerate} -\item One-sided puts : the initiator \ac{PE} (active side) specifies the local -data to be written to the target \ac{PE}'s (passive side) memory. -\item One-sided gets : an explicit fetch operation is used to copy a variable -amount of data from a remote process and store it locally.\end{enumerate} -\begin{description} -\item [{{Note:}}] By avoiding the need for matching send and receive -calls, \openshmem{}simplifies the communication process by reducing the -number of calls required to have one \ac{PE} interact with other \ac{PE}s. -\end{description} -\item \textbf{Synchronization Mechanisms } - -\begin{enumerate} -\item Fence: Ensures ordering of PUT operations to a specific \ac{PE}. -\item Quiet: Ensures ordering of PUT operations to all \ac{PE}s. -\item Barrier: A collective synchronization routine in which no \ac{PE} may leave -the barrier prior to all \ac{PE}s entering the barrier. -\end{enumerate} -\item \textbf{Collective Communication} - -\begin{enumerate} -\item Broadcast: Copy a block of data from one \ac{PE} to one or more remote -\ac{PE}s. -\item Collection: Concatenate elements from the source array to a target -array over the specified \ac{PE}s. -\item Reduction: Perform an associative binary operation over the specified -\ac{PE}s. -\end{enumerate} -\item \textbf{Address Manipulation} - -\begin{enumerate} -\item Allocating and deallocating memory blocks in the symmetric space. -\end{enumerate} -\item \textbf{Locks} - -\begin{enumerate} -\item Implementation of mutual exclusion. -\end{enumerate} -\item \textbf{Atomic Memory Operations} - -\begin{enumerate} -\item Swap, Conditional Swap, Add and Increment -\end{enumerate} -\item \textbf{Data Cache control} - -\begin{enumerate} -\item Implementation of mechanisms to exploit the capabilities of hardware -cache if available. -\end{enumerate} -\end{enumerate} -\begin{description} -\item [{{Note:}}] More information about \openshmem routines can be found -in the Library Routines section. -\end{description} - diff --git a/_deprecated_sources/INTRODUCTION/programming_model.tex b/_deprecated_sources/INTRODUCTION/programming_model.tex deleted file mode 100644 index ac0b4dad4..000000000 --- a/_deprecated_sources/INTRODUCTION/programming_model.tex +++ /dev/null @@ -1,107 +0,0 @@ -\section{Programming Model Overview} -%SP: Addressing suggestions from discussion on 01/28/2014 Merging the commented portions into the body. -%The \openshmem programming model consists of library routines that provide -%low-latency, high-bandwidth communication for use in highly parallelized -%scalable programs. The routines in the \openshmem \ac{API} provide a programming -%model for exchanging data between cooperating parallel processes. The resulting programs are similar -%in style to \ac{MPI} programs. The \openshmem \ac{API} can be used either alone -%or in combination with \ac{MPI} routines in the same parallel program. -%\openshmem implements a \ac{PGAS} model. -%In the \ac{PGAS} model, each process has a local and globally shared memory where portions of the shared memory may have affinity to a particular process. -\openshmem implements \ac{PGAS} by defining remotely accessible data objects as mechanisms to share information among \openshmem processes or \acp{PE} and private data objects that are accessible by the \ac{PE} itself. The \ac{API} allows communication and synchronization operations on both private (local to the PE initiating the operation) and remotely accessible data objects. The key feature of \openshmem is that data transfer operations are \textit{\textbf{one-sided}} in nature. This means that a local \ac{PE} executing a data transfer routine does not require the participation of the remote \ac{PE} to complete the routine. This allows for overlap between communication and computation to hide data transfer latencies, which makes \openshmem ideal for unstructured, small/medium size data communication patterns. The \openshmem library routines have the potential to provide a low-latency, high-bandwidth communication \ac{API} for use in highly parallelized scalable programs. -%\rcomment{Manju: To do - Make sure the first paragraph does not say the same -%things has first paragraph of Section 1} -%An \openshmem program is currently \ac{SPMD} in style. The -%\openshmem processes, called \ac{PE}s, all start at the -%same time, and they all run the same program. Usually the \ac{PE}s perform -%computation on their own subdomains of the larger problem, and periodically -%communicate with other \ac{PE}s to exchange information on which the -%next computation phase depends. -%SP: Addressing suggestions from discussion on 01/31/2014 - -%Data latency is the period of time that starts when a \ac{PE} initiates a transfer of data -%and ends when a \ac{PE} can use the data. %SP: What about put? Not guaranteed till synchronization is hit. - -%SP: Addressing suggestions from discussion on 01/28/2014 -%\openshmem routines support remote data transfer through \PUT operations, which transfer data to a -%different \ac{PE}, get operations, which transfer data from a different \ac{PE}, and remote pointers, which -%allow direct references to data objects owned by another \ac{PE}. Other operations supported are \OPR{collective} -%\OPR{broadcast} and \OPR{reduction}, \OPR{barrier synchronization}, and \OPR{atomic memory operations}. -%An atomic memory operation is an atomic read-and-update operation, such as a fetch-and-increment, on a remote -%or local data object. - -%\rcomment{Manju: [The idea is to talk SPMD. We are talking about nature of interfaces rather than -% the interfaces that enable SPMD. Replace the second paragraph with the one below ?]} -%\rcomment{\\ Oscar: I'm good with this change. minor change to say the SPMD can be used to decompose work\\} -The \openshmem{} interfaces can be used to implement \ac{SPMD} style programs. It provides interfaces to start the \openshmem{} \ac{PE}s in parallel, and communication and synchronization interfaces to access remotely accessible data objects across \ac{PE}s. These interfaces can be leveraged to divide a problem into multiple sub-problems that can be solved independently or with coordination using the communication and synchronization interfaces. The \openshmem specification defines library calls, constants, variables, and language bindings for \Clang{} and \Fortran{}. The \Cpp{} interface is currently the same as that for \Clang. Unlike UPC, Fortran 2008, Titanium, X10 and Chapel, which are all PGAS languages, \openshmem relies on -the user to use the library calls to implement the correct semantics of its programming model. - -An overview of the \openshmem routines is described below: - -\begin{enumerate} -\item \textbf{Library Setup and Query} - -\begin{enumerate} -\item \OPR{Initialization}: The \openshmem library environment is initialized. -\item \OPR{Query}: The local \ac{PE} may get number of \acp{PE} running the same program and its unique integer identifier. -\item \OPR{Accessibility}: The local \ac{PE} can find out if a remote \ac{PE} is executing the same binary, or if a particular symmetric data object can be accessed by a remote \ac{PE}, or may obtain a pointer to a symmetric data object on the specified remote \ac{PE} on shared memory systems. -\end{enumerate} - -\item \textbf{Symmetric Data Object Management} -\begin{enumerate} -\item \OPR{Allocation}: All executing \ac{PE}s must participate in the allocation of a symmetric data object with identical arguments. -\item \OPR{Deallocation}: All executing \ac{PE}s must participate in the deallocation of the same symmetric data object with identical arguments. -\item \OPR{Reallocation}: All executing \ac{PE}s must participate in the reallocation of the same symmetric data object with identical arguments. -\end{enumerate} - -\item \textbf{Remote Memory Access} - -\begin{enumerate} -\item \PUT: The local \ac{PE} specifies the \source{} -data object (private or symmetric) that is copied to the symmetric data object on the remote \ac{PE}. -\item \GET: The local \ac{PE} specifies the symmetric data object on the remote \ac{PE} -that is copied to a data object (private or symmetric) on the local \ac{PE}. -\end{enumerate} - -\item \textbf{Atomics} -\begin{enumerate} -\item \OPR{Swap}: The \ac{PE} initiating the swap gets the old value of a symmetric data object from a remote \ac{PE} and copies a new value to that symmetric data object on the remote \ac{PE}. -\item \OPR{Increment}: The \ac{PE} initiating the increment adds 1 to the symmetric data object on the remote \ac{PE}. -\item \OPR{Add}: The \ac{PE} initiating the add specifies the value to be added to the symmetric data object on the remote \ac{PE}. -\item \OPR{Compare and Swap}: The \ac{PE} initiating the swap gets the old value of the symmetric data object based on a value to be compared and copies a new value to the symmetric data object on the remote \ac{PE}. -\item \OPR{Fetch and Increment}: The \ac{PE} initiating the increment adds 1 to the symmetric data object on the remote \ac{PE} and returns with the old value. -\item \OPR{Fetch and Add}: The \ac{PE} initiating the add specifies the value to be added to the symmetric data object on the remote \ac{PE} and returns with the old value. -\end{enumerate} - -\item \textbf{Synchronization and Ordering} -\begin{enumerate} -\item \OPR{Fence}: The \ac{PE} calling fence ensures ordering of remote access operations and stores to symmetric data objects with respect to a specific destination \ac{PE}. -\item \OPR{Quiet}: The \ac{PE} calling quiet ensures completion of remote access operations and stores to symmetric data objects. -\item \OPR{Barrier}: All or some \ac{PE}s collectively synchronize and ensure completion of all remote and local updates prior to any \ac{PE} returning from the call. -\end{enumerate} - -\item \textbf{Collective Communication} -\begin{enumerate} -\item \OPR{Broadcast}: The \textit{root} \ac{PE} specifies a symmetric data object to be copied to a symmetric data object on one or more remote \acp{PE} (not including itself). -\item \OPR{Collection}: All \acp{PE} participating in the routine get the result of concatenated symmetric objects contributed by each of the \acp{PE} in another symmetric data object. -\item \OPR{Reduction}: All \acp{PE} participating in the routine get the result of an associative binary routine over elements of the specified symmetric data object on another symmetric data object. -\end{enumerate} - -\item \textbf{Mutual Exclusion} -\begin{enumerate} -\item \OPR{Set Lock}: The \ac{PE} acquires exclusive access to the region bounded by the symmetric \textit{lock} variable. -\item \OPR{Test Lock}: The \ac{PE} tests the symmetric \textit{lock} variable for availability. -\item \OPR{Clear Lock}: The \ac{PE} which has previously acquired the \textit{lock} releases it. -\end{enumerate} - -\item \textbf{Data Cache Control \textit{(deprecated on cache coherent systems )}} -\begin{enumerate} -\item Implementation of mechanisms to exploit the capabilities of hardware -cache if available. -\end{enumerate} -\end{enumerate} - -%\begin{description} -%\item [{{Note:}}] More information about \openshmem routines can be found -%in the Library Routines section. -%\end{description} diff --git a/_deprecated_sources/INTRODUCTION/rma_intro.tex b/_deprecated_sources/INTRODUCTION/rma_intro.tex deleted file mode 100644 index 6b5866a33..000000000 --- a/_deprecated_sources/INTRODUCTION/rma_intro.tex +++ /dev/null @@ -1,30 +0,0 @@ -\subsection{Remote Memory Access Routines} -\label{sec:rma} -The \ac{RMA} routines described in this section are one-sided communication -mechanisms of the \openshmem{} \ac{API}. While using these mechanisms, the user is required to provide parameters only on the calling side. A characteristic of one-sided communication is that it decouples communication from -the synchronization. One-sided communication mechanisms transfer -the data but do not synchronize the sender of the data with the receiver of the data. -%Oscar: I commented this because this sentence is useful but for background, but is out of scope for the spec API overview -%However, two-sided communication mechanisms such as Send/Recv have two effects: -%communication of data, and synchronization of the sender with the receiver. - -\openshmem{} \ac{RMA} routines are all performed on the symmetric objects. -The initiator \ac{PE} of the call is designated as \source{}, and the -\ac{PE} in which memory is accessed is designated as \dest{}. In the case of the remote -update routine, \PUT{}, the origin is the \source{} \ac{PE} and the -destination \ac{PE} is the \dest{} PE. In the case of the remote read routine, \GET{}, -the origin is the \dest{} \ac{PE} and the destination is the \source{} \ac{PE}. - -\openshmem{} provides three different types of one-sided communication interfaces. -\FUNC{shmem\_put$<$bits$>$} interface transfers data in chunks -of bits. \FUNC{shmem\_put32}, for example, copies data to a \dest{} \ac{PE} in chunks of 32 bits. \FUNC{shmem\_$<$datatype$>$\_put} interface copies elements of type -\textit{datatype} from a \source{} \ac{PE} to a \dest{} \ac{PE}. -For example, \FUNC{shmem\_integer\_put}, copies elements -of type integer from a \source{} \ac{PE} to a \dest{} \ac{PE}. -\FUNC{shmem\_$<$datatype$>$\_p} interface is similar to \FUNC{shmem\_$<$datatype$>$\_put} except that it only transfers one element of type \VAR{datatype}. - -\openshmem{} provides interfaces for transferring both contiguous and -non-contiguous data. The non-contiguous data transfer interfaces are prefixed -with ``\VAR{i}". \FUNC{shmem\_$<$datatype$>$\_iput} interface, for example, copies strided data elements from the \source{} \ac{PE} to a \dest{} \ac{PE}. - - diff --git a/_deprecated_sources/INTRODUCTION/setup_query_intro.tex b/_deprecated_sources/INTRODUCTION/setup_query_intro.tex deleted file mode 100644 index e961fe7cb..000000000 --- a/_deprecated_sources/INTRODUCTION/setup_query_intro.tex +++ /dev/null @@ -1 +0,0 @@ -The library setup and query interfaces that initialize and monitor the parallel environment of the \ac{PE}s. \ No newline at end of file diff --git a/_deprecated_sources/INTRODUCTION/synchronization_intro.tex b/_deprecated_sources/INTRODUCTION/synchronization_intro.tex deleted file mode 100644 index c8923227f..000000000 --- a/_deprecated_sources/INTRODUCTION/synchronization_intro.tex +++ /dev/null @@ -1 +0,0 @@ -The following section discusses \openshmem \ac{API}s that provides a mechanism for synchronization between two \ac{PE}s based on the value of a symmetric data object. diff --git a/_deprecated_sources/INTRODUCTION/synchronization_model.tex b/_deprecated_sources/INTRODUCTION/synchronization_model.tex deleted file mode 100644 index eb2dcf2cb..000000000 --- a/_deprecated_sources/INTRODUCTION/synchronization_model.tex +++ /dev/null @@ -1,176 +0,0 @@ -\subsubsection{Synchronization and Communication Ordering in \openshmem} - -%In the presence of the \openshmem's one-sided communication, synchronization and ordering become critical. - -When using the \openshmem \ac{API}, synchronization, ordering, and completion of communication become critical. The updates via \PUT{} routines, \acp{AMO} and store routines on symmetric data cannot be guaranteed until some form of synchronization or ordering is introduced by the program user. The table below gives the different synchronization and ordering choices, and the situations where they may be useful.\\ - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -\hline -\textbf{\openshmem \ac{API}} & \centering \textbf{Working of \openshmem \ac{API}} \tabularnewline -\hline -\hline -{Point-to-point synchronization}\\ -\FUNC{shmem\_wait}, \FUNC{shmem\_wait\_until} -& -\raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{diagrams/updated/wait}} -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -{} -& -{ Waits for a symmetric variable to be updated by a remote \ac{PE}. Should be used when computation on the local \ac{PE} cannot proceed without the value that the remote \ac{PE} is to update.} \tabularnewline -\hline -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} - -{Ordering puts issued by a local \ac{PE}} \\ -\FUNC{shmem\_fence} -& -\raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{diagrams/updated/fence}} -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -{} -& -All \PUT{} routines, \acp{AMO} and store routines on symmetric data issued to same \ac{PE} are guaranteed to be delivered before Puts (to the same \ac{PE}) issued after the \FUNC{fence} call. \tabularnewline -%Fence guaranteed order of puts before and after before \Put{}s -%before the fence operation by the local \ac{PE} are guaranteed to be completed and visible before puts issued after the fence call. -% This operation should be used when all remote writes by a local \ac{PE} to a specific remote \ac{PE} need to be visible %(\rcomment{Swaroop: assuming visible == delivered}) -%before any new remote write operation to the same \ac{PE}. \tabularnewline -\hline -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -\hline -\textbf{\openshmem \ac{API}} & \centering \textbf{Working of \openshmem \ac{API}} \tabularnewline -\hline -\hline -{Ordering puts issued by all \ac{PE} }\\ -\FUNC{shmem\_quiet} -& -\raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{diagrams/updated/quiet}} -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -{} -& -{All \PUT{} routines, \acp{AMO} and store routines on symmetric data issued by a local \ac{PE} to all remote \ac{PE}s are guaranteed to be completed and visible once quiet returns. This routine should be used when all remote writes issued by a local \ac{PE} need to be visible to all other \ac{PE}s before the local \ac{PE} proceeds. } \tabularnewline -\hline -\end{tabular} - - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -Collective synchronization over an \activeset \\ -\FUNC{shmem\_barrier} -& -\raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{diagrams/updated/barrier}} -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -{} -& -{All local and remote memory operations issued by all \ac{PE}s within the \activeset{} are guaranteed to be completed before any \ac{PE} in the \activeset{} returns from the call. Additionally, no \ac{PE} my return from the barrier until all \ac{PE}s in the \activeset{} have entered the same barrier call. This routine should be used when synchronization as well as completion of all stores and remote memory updates via \openshmem is required over a sub set of the executing \ac{PE}s.} \tabularnewline %Figure (\ref{fig:barrier}). -\hline -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -\hline -\textbf{\openshmem \ac{API}} & \centering \textbf{Working of \openshmem \ac{API}} \tabularnewline -\hline -\hline -{Collective synchronization over all \ac{PE}s} \\ - \FUNC{shmem\_barrier\_all} -& -\raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{diagrams/updated/barrierall}} -\end{tabular} - -\begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -{} -& -{All local and remote memory operations issued by all \ac{PE}s are guaranteed to be completed before any \ac{PE} returns from the call. Additionally no \ac{PE} shall return from the barrier until all \ac{PE}s have entered the same \FUNC{shmem\_barrier\_all} call. This routine should be used when synchronization as well as completion of all stores and remote memory updates via \openshmem is required over all \ac{PE}s. } \tabularnewline -\hline -\end{tabular} -\clearpage -%%%%%%%%%%%%%%%%%OLD LAYOUT%%%%%%%%%%%%%%%% -%\begin{tabular}{|p{0.2\textwidth}|p{0.4\textwidth}|p{0.3\textwidth}|} -%\hline -%\textbf{\openshmem \ac{API}} & \centering \textbf{Working of \openshmem \ac{API}} & \textbf{Appropriate Situation}\tabularnewline -%\hline -%\hline -%{Point-to-point synchronization}\\ -%\FUNC{shmem\_wait}, \FUNC{shmem\_wait\_until} -%& -%\raisebox{-\totalheight}{\includegraphics[width=0.39\textwidth]{diagrams/updated/wait}} -%{Waits for a symmetric variable to be updated by a remote \ac{PE}. Should be used when computation at the local \ac{PE} cannot proceed without the value that the remote \ac{PE} is to update.} \tabularnewline -%% Figure (\ref{fig:wait}).}\tabularnewline -%\hline -%Ordering puts issued by a local \ac{PE} \\ -%\FUNC{shmem\_fence} -%& -%\raisebox{-\totalheight}{\includegraphics[width=0.39\textwidth]{diagrams/updated/fence}} -%& -%All puts issued before the fence operation by the local \ac{PE} are guaranteed to be delivered before puts issued after the fence call to the same remote \ac{PE}. This operation should be used when all remote writes by a local \ac{PE} to a remote \ac{PE} need to be visible %(\rcomment{Swaroop: assuming visible == delivered}) -%before any new remote write operation to the same \ac{PE}. \tabularnewline -%%Figure (\ref{fig:fence}).\tabularnewline -%\hline -%Ordering puts issued by all \ac{PE} \\ -%\FUNC{shmem\_quiet} -%& -%\raisebox{-\totalheight}{\includegraphics[width=0.39\textwidth]{diagrams/updated/quiet}} -%& -%{All puts issued by all \ac{PE}s are guaranteed to be delivered before the next local update or remote memory update via \openshmem (\rcomment{May change after SGI's input.}). This operation should be used when all remote writes by all \ac{PE}s need to be visible to all other \ac{PE}s before any new local or remote memory update via \openshmem library operation. } \tabularnewline -%%Figure (\ref{fig:quiet}).} \tabularnewline -%\hline -%\end{tabular} -%\clearpage -%\begin{tabular}{|p{0.2\textwidth}|p{0.4\textwidth}|p{0.3\textwidth}|} -%\hline -%Collective synchronization over an \activeset \\ -%\FUNC{shmem\_barrier} -%& -%\raisebox{-\totalheight}{\includegraphics[width=0.39\textwidth]{diagrams/updated/barrier}} -%& -%{All local and remote memory operations issued by all \ac{PE}s within the \activeset{} are guaranteed to be completed before any \ac{PE} in the \activeset{} returns from the call. Additionally no \ac{PE} my return from the barrier till all \ac{PE}s in the \activeset{} have called the same barrier call. This operation should be used when synchronization as well as completion of local stores and remote memory updates via \openshmem is required over a sub-set of the executing \ac{PE}s.} \tabularnewline %Figure (\ref{fig:barrier}). -%\hline -%Collective synchronization over all \ac{PE}s \\ -% \FUNC{shmem\_barrier\_all} -%& -%\raisebox{-\totalheight}{\includegraphics[width=0.39\textwidth]{diagrams/updated/barrierall}} -%& -%{All local and remote memory operations issued by all \ac{PE}s are guaranteed to be completed before any \ac{PE} returns from the call. Additionally no \ac{PE} my return from the barrier until all \ac{PE}s have called the same barrier call. This operation should be used when synchronization as well as completion of local stores and remote memory updates via \openshmem is required over all \ac{PE}s. } \tabularnewline%Figure (\ref{fig:barrierall}). -% -%\hline -%\end{tabular} - - -%\begin{figure} -%% \centering -% \begin{subfigure}{0.5\textwidth} -% \includegraphics[width=\textwidth]{diagrams/updated/wait} -% \caption{\FUNC{shmem\_wait}} -% \label{fig:wait} -% \end{subfigure} -% \begin{subfigure}{0.49\textwidth} -% \includegraphics[width=\textwidth]{diagrams/updated/fence} -% \caption{\FUNC{shmem\_fence}} -% \label{fig:fence} -% \end{subfigure} -% \begin{subfigure}{0.48\textwidth} -% \includegraphics[width=\textwidth]{diagrams/updated/quiet} -% \caption{\FUNC{shmem\_quiet}} -% \label{fig:quiet} -% \end{subfigure} -% \begin{subfigure}{0.48\textwidth} -% \includegraphics[width=\textwidth]{diagrams/updated/barrier} -% \caption{\FUNC{shmem\_barrier}} -% \label{fig:barrier} -% \end{subfigure} -% \centering -% \begin{subfigure}{0.48\textwidth} -% \includegraphics[width=\textwidth]{diagrams/updated/barrierall} -% \caption{\FUNC{shmem\_barrierall}} -% \label{fig:barrierall} -% \end{subfigure} -% \caption{\openshmem{} synchronization operations}\label{fig:animals} -%\end{figure} diff --git a/_deprecated_sources/INTRODUCTION/undefined_behavior.tex b/_deprecated_sources/INTRODUCTION/undefined_behavior.tex deleted file mode 100644 index f199d4782..000000000 --- a/_deprecated_sources/INTRODUCTION/undefined_behavior.tex +++ /dev/null @@ -1,55 +0,0 @@ - -The specification provides guidelines to the expected behavior of -various library routines. In cases where routines are improperly used -or the input is not in accordance with the specification, undefined -behavior may be observed. Depending on the implementation there are -many interpretations of undefined behavior. - -$\;$ - -$ $% -\begin{tabular}{|>{\raggedright}p{0.3\textwidth}|>{\raggedright}p{0.6\textwidth}|} -\hline -\textbf{Inappropriate Usage} & \textbf{Undefined Behavior}\tabularnewline -\hline -\hline -Uninitialized library & If \openshmem is not initialized through a call to -\FUNC{shmem\_init}, subsequent accesses to \openshmem routines have undefined -results. An implementation may choose, for example, to try to continue or abort -immediately upon the first call to an uninitialized routine.\tabularnewline -\hline -Accessing non-existent \ac{PE}s & If a communications routine accesses a -non-existent \ac{PE}, then the \openshmem library can choose to handle this -situation in an implementation-defined way. For example, the library may issue -an error message saying that the \ac{PE} accessed is outside the range of -accessible \ac{PE}s, or may exit without a warning.\tabularnewline -\hline -Use of non-symmetric variables & Some routines require remotely accessible -variables to perform their function. A \PUT{} to a non-symmetric variable can -be trapped where possible and the library can abort the program. Another -implementation may choose to continue either with a warning or -silently.\tabularnewline -\hline -Non-symmetric variables & The symmetric memory management routines are -collectives, which means that all \ac{PE}s in the program must issue the same -\FUNC{shmem\_malloc} call with the same size request. Program behavior after a -mismatched \FUNC{shmem\_malloc} call is undefined.\tabularnewline -\hline -Use of NULL pointers with non-zero \VAR{len} specified & In any \openshmem routine -that takes a pointer and \VAR{len} describing the number of elements in that -pointer, NULL may not be specified for the pointer unless the corresponding \VAR{len} is also -specified as zero. Otherwise, the resulting behavior is undefined. -The following cases summarize this behavior: -\begin{itemize} - \item \VAR{len} is 0, pointer is NULL: supported. - \item \VAR{len} is not 0, pointer is NULL: undefined behavior. - \item \VAR{len} is 0, pointer is not NULL: supported. - \item \VAR{len} is not 0, pointer is not NULL: supported. -\end{itemize} -\tabularnewline -\hline -Multiple calls to \FUNC{shmem\_init} & In an OpenSHMEM program where -\FUNC{shmem\_init} has already be called, any subsequent calls to -\FUNC{shmem\_init} result in undefined behavior.\tabularnewline -\hline -\end{tabular} diff --git a/_deprecated_sources/INTRODUCTION/writing_programs.tex b/_deprecated_sources/INTRODUCTION/writing_programs.tex deleted file mode 100644 index dace61721..000000000 --- a/_deprecated_sources/INTRODUCTION/writing_programs.tex +++ /dev/null @@ -1,44 +0,0 @@ -\section*{Incorporating \openshmem{} into Programs} - -In this section, we describe how to write a ``Hello World" \openshmem program. -To write a ``Hello World" \openshmem program we need to: - -\begin{itemize} -\item Add the include file shmem.h (for \Clang) or shmem.fh (for \Fortran). -\item Add the initialization call \FUNC{shmem\_init}, (line 9). -\item Use OpenSHMEM calls to query the the total number of PEs (line 10) and PE id (line 11). -\item There is no explicit finalize call; either a return from - \texttt{main()} (line 13) or an explicit \texttt{exit()} acts as an - implicit \openshmem finalization. -\item In \openshmem the order in which lines appear - in the output is not fixed as \ac{PE}s execute asynchronously in parallel. -\end{itemize} - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=openshmem-hello,language=OSH2+C}{EXAMPLES/hello-openshmem.c} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{EXAMPLES/hello-openshmem-c.output} -\vspace{0.1in} -\end{minipage} - -\openshmem also has a \Fortran{} API, so for completeness we will now give the -same program written in \Fortran, in listing~\ref{openshmem-hello-f90}: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=openshmem-hello-f90,language=OSH2+F}{EXAMPLES/hello-openshmem.f90} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{EXAMPLES/hello-openshmem-f90.output} -\vspace{0.1in} -\end{minipage} - -The following example shows a more complex \openshmem program that illustrates the use of symmetric data objects. Note the declaration of the \VAR{static short dest} array and its use as the remote destination in \openshmem short \PUT. The use of the \VAR{static} keyword results in the \VAR{dest} array being symmetric on \ac{PE} \CONST{0} and \ac{PE} \CONST{1}. Each \ac{PE} is able to transfer data to the \dest{} array by simply specifying the local address of the symmetric data object which is to receive the data. This aids programmability, as the address of the \dest{} need not be exchanged with the active side (\ac{PE} \CONST{0}) prior to the RMA (Remote Memory Access) routine. Conversely, the declaration of the \VAR{short source} array is asymmetric. Because the \PUT{} handles the references to the \VAR{source} array only on the active (local) side, the asymmetric \source{} object is handled correctly. - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=openshmem-hello,language=OSH2+C}{EXAMPLES/writing_shmem_example.c} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{EXAMPLES/writing_shmem_example.output} -\vspace{0.1in} -\end{minipage} diff --git a/_deprecated_sources/MACROS b/_deprecated_sources/MACROS deleted file mode 100644 index 46445be70..000000000 --- a/_deprecated_sources/MACROS +++ /dev/null @@ -1,4 +0,0 @@ -Function: \FUNC{} -Variables: \VAR{} -Constants: \CONST{} - diff --git a/_deprecated_sources/Makefile b/_deprecated_sources/Makefile deleted file mode 100644 index 89c868c6f..000000000 --- a/_deprecated_sources/Makefile +++ /dev/null @@ -1,38 +0,0 @@ -TARGET=main_spec - -# make pdf by default -all: ${TARGET}.pdf - -# it doesn't really need the .dvi, but this way all the refs are right -%.pdf : %.dvi - pdflatex $* - -${TARGET}.bbl: ${TARGET}.bib -# in case we don't already have a .aux file listing citations -# this should probably be a separate makefile target/dependency instead -# of doing it every time... but *shrug* - pdflatex ${TARGET}.tex -# get the citations out of the bibliography - bibtex ${TARGET} -# do it again in case there are out-of-order cross-references - @pdflatex ${TARGET}.tex - -#${TARGET}.dvi: ${TARGET}.bbl ${TARGET}.tex -${TARGET}.dvi: ${TARGET}.tex - @pdflatex ${TARGET}.tex - -# shortcut, so we can say "make ps" -ps: ${TARGET}.ps - -${TARGET}.ps: ${TARGET}.dvi - @dvips -t a4 ${TARGET}.dvi - -clean: - rm -f ${TARGET}.{log,aux,ps,dvi,bbl,blg,log} - -veryclean: clean - rm -f ${TARGET}.{ps,pdf,out} - - -PHONY : ps all clean reallyclean - diff --git a/_deprecated_sources/OSH_Spec1.1.xlsx b/_deprecated_sources/OSH_Spec1.1.xlsx deleted file mode 100644 index 78466fe87..000000000 Binary files a/_deprecated_sources/OSH_Spec1.1.xlsx and /dev/null differ diff --git a/_deprecated_sources/OpenSHMEM_Pound.png b/_deprecated_sources/OpenSHMEM_Pound.png deleted file mode 100644 index 438e33d90..000000000 Binary files a/_deprecated_sources/OpenSHMEM_Pound.png and /dev/null differ diff --git a/_deprecated_sources/README b/_deprecated_sources/README deleted file mode 100644 index d505ff498..000000000 --- a/_deprecated_sources/README +++ /dev/null @@ -1 +0,0 @@ -Oscar has already created shmem_put.tex in TEX_FILES with the appropriate macros. diff --git a/_deprecated_sources/TEX_FILES/NEW_shmem_my_pe.tex b/_deprecated_sources/TEX_FILES/NEW_shmem_my_pe.tex deleted file mode 100644 index e5f613b15..000000000 --- a/_deprecated_sources/TEX_FILES/NEW_shmem_my_pe.tex +++ /dev/null @@ -1,45 +0,0 @@ -\bAPI{SHMEM\_MY\_PE}{Returns the number of the calling \ac{PE}.} -%Synopsis C -\synC -int shmem_my_pe(void); %*\synCE - -%Synopsis F -\synF -INTEGER SHMEM_MY_PE, ME -ME = SHMEM_MY_PE()%*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{None}{}{} -} -%API Description -{ - This routine returns the \ac{PE} number of the calling \ac{PE}. It accepts no arguments. The result is an integer between \CONST{0} and \VAR{npes} - \CONST{1}, where \VAR{npes} is the total number of \ac{PE}s executing the current program. -} -%API Description Table. -{ -% \desTB{ } -% { -% \cRow{}{} -% } - %Return Value - \desR{Integer - Between \CONST{0} and \VAR{npes} - \CONST{1}} - %NOTES - \notesB{Each \ac{PE} has a unique number or identifier. As of \openshmem - Specification 1.2 the use of name \FUNC{\_my\_pe} has been deprecated. - Although \openshmem libraries are required to support the call, users - are encouraged to use \FUNC{shmem\_my\_pe} instead. The behavior and - signature of the routine \FUNC{shmem\_my\_pe} remains unchanged from the - deprecated \FUNC{\_my\_pe} version. -} -} -%EXAMPLES -\exampleB{ - \exampleITEM - {The following \FUNC{shmem\_my\_pe} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_mype_example.c} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/NEW_shmem_num_pe.tex b/_deprecated_sources/TEX_FILES/NEW_shmem_num_pe.tex deleted file mode 100644 index 878f8f114..000000000 --- a/_deprecated_sources/TEX_FILES/NEW_shmem_num_pe.tex +++ /dev/null @@ -1,46 +0,0 @@ -\bAPI{SHMEM\_N\_PES}{Returns the number of \ac{PE}s running in a program.} -%Synopsis C -\synC -int shmem_n_pes(void); %*\synCE - -%Synopsis F -\synF -INTEGER SHMEM_N_PES, N_PES -N_PES = SHMEM_N_PES()%*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{None}{}{} -} -%API Description -{ - The routine returns the number of \ac{PE}s running the program. -} -%API Description Table. -{ -% \desTB{} -% { -% \cRow{}{} -% } - %Return Value - \desR{Integer - Number of \ac{PE}s running the \openshmem program.} - %NOTES - \notesB{As of \openshmem Specification 1.2 the use of \FUNC{\_num\_pes} - has been deprecated. Although \openshmem libraries are required to - support the call, users are encouraged to use \FUNC{shmem\_n\_pes} - instead. The behavior and signature of the routine - \FUNC{shmem\_n\_pes} remains unchanged from the deprecated - \FUNC{\_num\_pes} version. -} - } - %EXAMPLES -\exampleB{ - \exampleITEM - {The following \FUNC{shmem\_n\_pes} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_npes_example.c} - {} -} -\eAPI - diff --git a/_deprecated_sources/TEX_FILES/shfree.tex b/_deprecated_sources/TEX_FILES/shfree.tex deleted file mode 100644 index 82ff3757c..000000000 --- a/_deprecated_sources/TEX_FILES/shfree.tex +++ /dev/null @@ -1,97 +0,0 @@ -\bAPI{SHMEM\_MALLOC, SHMEM\_FREE, SHMEM\_REALLOC, SHMEM\_ALIGN}{Symmetric heap memory management routines.} -%SYNOPSIS -\synC -void *shmem_malloc(size_t size); -void shmem_free(void *ptr); -void *shmem_realloc(void *ptr, size_t size); -void *shmem_align(size_t alignment, size_t size);%*\synCE - -%DESCRIPTION -%Arguments -\desB{ - \argRow{IN}{size}{The size, in bytes, of a block to be allocated from the symmetric heap. This argument is of type \VAR{size\_t}} - \argRow{IN}{ptr}{Points to a block within the symmetric heap.} - \argRow{IN}{alignment}{Byte alignment of the block allocated from the symmetric heap.} -} -%API Description -{ - The \FUNC{shmem\_malloc} routine returns a pointer to a block of at least \VAR{size} - bytes suitably aligned for any use. This space is allocated from the - symmetric heap (in contrast to \FUNC{malloc}, which allocates from the - private heap). - - The \FUNC{shmem\_align} routine allocates a block in the symmetric heap that - has a byte alignment specified by the alignment argument. - - The \FUNC{shmem\_free} routine causes the block to which \VAR{ptr} points to be - deallocated, that is, made available for further allocation. If \VAR{ptr} is - a null pointer, no action occurs. - - The \FUNC{shmem\_realloc} routine changes the size of the block to which \VAR{ptr} - points to the size (in bytes) specified by \VAR{size}. The contents of the - block are unchanged up to the lesser of the new and old sizes. If the - new size is larger, the value of the newly allocated portion of the - block is indeterminate. - If \VAR{ptr} is a \CONST{NULL} pointer, the \FUNC{shmem\_realloc} routine behaves like the \FUNC{shmem\_malloc} routine for the specified size. If \VAR{size} is \CONST{0} and \VAR{ptr} is not a \CONST{NULL} pointer, the block to which it points is freed. If the space cannot be allocated, the block to which \VAR{ptr} points is unchanged. - - The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, and \FUNC{shmem\_realloc} routines are provided so that multiple \ac{PE}s in a program can allocate symmetric, remotely - accessible memory blocks. These memory blocks can then be used with - \openshmem communication routines. Each of these routines call the - \FUNC{shmem\_barrier\_all} routine before returning; this ensures that all - \ac{PE}s participate in the memory allocation, and that the memory on other - \ac{PE}s can be used as soon as the local \ac{PE} returns. The user is - responsible for calling these routines with identical argument(s) on - all \ac{PE}s; if differing \VAR{size} arguments are used, the behavior of the call and any subsequent \openshmem calls becomes undefined. - %subsequent calls may not - %return the same symmetric heap address on all \ac{PE}s. -} -%API Description Table -{ - %Return Value - \desR{ - The \FUNC{shmem\_malloc} routine returns a pointer to the allocated space; otherwise, it returns a \CONST{NULL} pointer. - - The \FUNC{shmem\_free} routine returns no value. - - The \FUNC{shmem\_realloc} routine returns a pointer to the allocated space (which may have moved); otherwise, it returns a null pointer. - - The \FUNC{shmem\_align} routine returns an aligned pointer to the allocated space; otherwise, it returns a \CONST{NULL} pointer. - } - %NOTES - \notesB{ As of Specification 1.2 the use of \FUNC{shmalloc}, - \FUNC{shmemalign}, \FUNC{shfree}, and \FUNC{shrealloc} has been - deprecated. Although OpenSHMEM libraries are required to support - the calls, program users are encouraged to use - \FUNC{shmem\_malloc}, \FUNC{shmem\_align}, \FUNC{shmem\_free}, - and \FUNC{shmem\_realloc} instead. The behavior and signature - of the routines remains unchanged from the deprecated versions. - -The total size of the symmetric heap is determined at job startup. One can adjust the size of the heap using the \CONST{SMA\_SYMMETRIC\_SIZE} environment variable (where available). - -The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, and \FUNC{shmem\_realloc} routines differ from the private heap allocation routines in that all \ac{PE}s in a program must call them (a barrier is used to ensure this). - } - \notesImp{ - The symmetric heap allocation functions always return a pointer to - corresponding symmetric objects across all PEs. The \openshmem{} - specification does not require that the virtual addresses are equal across - all \acp{PE}. Nevertheless, the implementation must avoid costly address - translation operations in the communication path, including order $N$ (where - $N$ is the number of \acp{PE}) memory translation tables. - In order to avoid address translations, the implementation may - re-map the allocated block of memory based on agreed virtual address. - Additionally, some operating systems provide an option to disable - virtual address randomization, which enables predictable allocation - of virtual memory addresses. - } -} - -%EXAMPLES -%\rcomment{Tommy: Code will not compile without example, need example for shfree. \\} -% -%\exampleB{ -% \exampleITEM -% {The following shmalloc example is for C/C++ programs:} -% {./EXAMPLES/shmem_npes_example.c} -% {} - -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_add.tex b/_deprecated_sources/TEX_FILES/shmem_add.tex deleted file mode 100644 index d4a18683b..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_add.tex +++ /dev/null @@ -1,43 +0,0 @@ -\bAPI{SHMEM\_ADD}{Performs an atomic add operation on a remote symmetric data object.} -\synC -void shmem_int_add(int *dest, int value, int pe); -void shmem_long_add(long *dest, long value, int pe); -void shmem_longlong_add(long long *dest, long long value, int pe); %*\synCE -\synF -INTEGER pe -INTEGER*4 value_i4 -CALL SHMEM_INT4_ADD(dest, value_i4, pe) -INTEGER*8 value_i8 -CALL SHMEM_INT8_ADD(dest, value_i8, pe) %*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{OUT}{dest}{The remotely accessible integer data object to be updated on the remote \ac{PE}. If you are using \CorCpp, the type of \dest{} should match that implied in the SYNOPSIS section.} - \argRow{IN}{value}{The value to be atomically added to \dest. If you are using \CorCpp, the type of \VAR{value} should match that implied in the SYNOPSIS section. If you are using \Fortran, it must be of type integer with an element size of \dest.} - \argRow{IN}{pe}{An integer that indicates the \ac{PE} number upon which \dest{} is to be updated. If you are using \Fortran, it must be a default integer value.} -} -%API Description -{The \FUNC{shmem\_add} routine performs an atomic add operation. It adds \VAR{value} - to \dest{} on \ac{PE} \VAR{pe} and atomically updates the \dest{} without returning the value. - } -{ - \hfill \\ -\desTB {If you are using \Fortran, \VAR{dest} must be of the following type:} -{ -\cRow{SHMEM\_INT4\_ADD}{\CONST{4}-byte integer} -\cRow{SHMEM\_INT8\_ADD}{\CONST{8}-byte integer} -} - \desR{None.} - \notesB{The term remotely accessible is defined in the Introduction.} -} - -\exampleB{ - \exampleITEM - {} - {./EXAMPLES/shmem_add_example.c} - {} -} - -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_addr_accessible.tex b/_deprecated_sources/TEX_FILES/shmem_addr_accessible.tex deleted file mode 100644 index 7f1d5f720..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_addr_accessible.tex +++ /dev/null @@ -1,64 +0,0 @@ -\bAPI{SHMEM\_ADDR\_ACCESSIBLE}{Determines whether an address is accessible via OpenSHMEM data transfer routines from the specified remote \ac{PE}.} -%SYNOPSIS -\synC -int shmem_addr_accessible(void *addr, int pe); %*\synCE - -\synF -LOGICAL LOG, SHMEM_ADDR_ACCESSIBLE -INTEGER pe -LOG = SHMEM_ADDR_ACCESSIBLE(addr, pe) %*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{IN}{addr}{Data object on the local \ac{PE}.} - \argRow{IN}{pe}{Integer id of a remote \ac{PE}.} -} -%API Description -{ - \FUNC{shmem\_addr\_accessible} is a query routine that indicates whether a - local address is accessible via \openshmem routines from the specified - remote \ac{PE}. - - This routine verifies that the data object is symmetric and accessible - with respect to a remote \ac{PE} via \openshmem data transfer routines. The - specified address \VAR{addr} is a data object on the local \ac{PE}. - - This routine may be particularly useful for hybrid programming with - other communication libraries (such as \ac{MPI}) or parallel languages. - For example, in SGI Altix series systems, for multiple executable MPI programs - that use \openshmem routines, it is important to note that static memory, - such as a \Fortran{} common block or \Clang{} global variable, is symmetric - between processes running from the same executable file, but is not - symmetric between processes running from different executable files. - Data allocated from the symmetric heap (\FUNC{shmem\_malloc} or \FUNC{shpalloc}) is - symmetric across the same or different executable files. -} -%API Description Table -{ -% \desTB{} -% { -% \cRow{}{} -% } - %Return Value - \desR{ - \CorCpp: The return value is \CONST{1} if \VAR{addr} is a symmetric data object and - accessible via \openshmem routines from the specified remote \ac{PE}; - otherwise, it is \CONST{0}. - - \Fortran: The return value is \CONST{.TRUE.} if \VAR{addr} is a symmetric data object - and accessible via \openshmem routines from the specified remote - \ac{PE}; otherwise, it is \CONST{.FALSE.}. - } - %NOTES - \notesB{None.} -} -%%EXAMPLES -%\exampleB{ -% \exampleITEM -% {TEST} -% {./EXAMPLES/shmem_npes_example.c} -% {} -%} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_barrier.tex b/_deprecated_sources/TEX_FILES/shmem_barrier.tex deleted file mode 100644 index b82f64221..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_barrier.tex +++ /dev/null @@ -1,57 +0,0 @@ -\bAPI{SHMEM\_BARRIER}{Performs all operations described in the \FUNC{shmem\_barrier\_all} interface but with respect to a subset of \acp{PE} defined by the \activeset{}.} %Eric: Wish list: Add a ~/ref{} to barrier_all API. -\synC -void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync); %*\synCE -\synF -INTEGER PE_start, logPE_stride, PE_size -INTEGER pSync(SHMEM_BARRIER_SYNC_SIZE) -CALL SHMEM_BARRIER(PE_start, logPE_stride, PE_size, pSync) %*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of \ac{PE}s. \VAR{PE\_start} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{PE\_size}{The number of \ac{PE}s in the \activeset. \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{pSync}{ A symmetric work array. In \CorCpp, \VAR{pSync} must be of type long and size \CONST{\_SHMEM\_BARRIER\_SYNC\_SIZE}. In \Fortran, \VAR{pSync} must be of type integer and size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. If you are using \Fortran, it must be a default integer type. Every element of this array must be initialized to \CONST{SHMEM\_SYNC\_VALUE} before any of the \ac{PE}s in the \activeset{} enter \FUNC{shmem\_barrier} the first time.} -} -%API Description -{ - \FUNC{shmem\_barrier} is a collective synchronization routine over an \activeset. Control returns from \FUNC{shmem\_barrier} after all \ac{PE}s in the \activeset{} (specified by - \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}) have called \FUNC{shmem\_barrier}. - - As with all \openshmem collective routines, each of these routines assumes - that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the - \activeset{} calls an \openshmem collective routine, undefined behavior results. - - The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \ac{PE}s in the \activeset. The same work array must be passed - in \VAR{pSync} to all \ac{PE}s in the \activeset. - - \FUNC{shmem\_barrier} ensures that all previously issued stores and - remote memory updates, including \acp{AMO} and \ac{RMA} operations, - done by any of the \ac{PE}s in the \activeset{} are complete before returning. -% For example, \FUNC{shmem\_put} and \FUNC{shmem\_int\_add} - - The same \VAR{pSync} array may be reused on consecutive calls to - \FUNC{shmem\_barrier} if the same active \ac{PE} set is used. -} -%API Description Table -{ - %Return Values - \desR{None.} - %NOTES - \notesB{ - If the \VAR{pSync} array is initialized at run time, be sure to use some type of synchronization, for example, a call to \FUNC{shmem\_barrier\_all}, before calling \FUNC{shmem\_barrier} for the first time. - - If the \activeset{} does not change, \FUNC{shmem\_barrier} can be called repeatedly with the same \VAR{pSync} array. No additional synchronization beyond that implied by \FUNC{shmem\_barrier} itself is necessary in this case. - } -} -%EXAMPLES -\exampleB{ - \exampleITEM - {The following barrier example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_barrier_example.c} - {} -} - -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_barrier_all.tex b/_deprecated_sources/TEX_FILES/shmem_barrier_all.tex deleted file mode 100644 index adc6cdb3a..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_barrier_all.tex +++ /dev/null @@ -1,85 +0,0 @@ -\bAPI{SHMEM\_BARRIER\_ALL}{Registers the arrival of a \ac{PE} at a barrier and suspends \ac{PE} execution until all other \ac{PE}s arrive at the barrier and all local and remote memory updates are completed.} -\synC %Synopisis for C API - -void shmem_barrier_all(void);%*\synCE - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_BARRIER_ALL%*\synFE - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{None.}{}{} -} -%API description -{ - The \FUNC{shmem\_barrier\_all} routine registers the arrival of a \ac{PE} at a - barrier. Barriers are a fast mechanism for synchronizing all \ac{PE}s at - once. This routine causes a \ac{PE} to suspend execution until all \ac{PE}s have - called \FUNC{shmem\_barrier\_all}. This routine must be used with \ac{PE}s started - by \FUNC{shmem\_init}. - - Prior to synchronizing with other \ac{PE}s, \FUNC{shmem\_barrier\_all} ensures - completion of all previously issued memory stores and remote - memory updates issued via \openshmem{} \acp{AMO} and \ac{RMA} routine calls such as \FUNC{shmem\_int\_add} and \FUNC{shmem\_put32}. -} -{ -%API Description Table. -\desR{ - %Return Values - {None.} -} -% Notes. If there are no notes, this field can be left empty. -\notesB{None.} -} -%Example -\exampleB{ - %For each example, you can enter it as an item. - \exampleITEM - { The following \FUNC{shmem\_barrier\_all} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_barrierall_example.c} - {} -} -\eAPI -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% Registers the arrival of a processing element (PE) at a barrier and suspends PE execution until all other PEs arrive at the barrier and all local and remote memory updates are completed. -% -% SYNOPSIS -% C or C++: -% -% void barrier(void); -% -% void shmem_barrier_all(void); -% -% Fortran: -% -% CALL BARRIER -% -% CALL SHMEM_BARRIER_ALL -% -% DESCRIPTION -% -% Arguments -% None. -% -% API Description -% -% The shmem_barrier_all function registers the arrival of a PE at a -% barrier. Barriers are a fast mechanism for synchronizing all PEs at -% once. This routine causes a PE to suspend execution until all PEs have -% called shmem_barrier_all. This function must be used with PEs started -% by start_pes(). -% -% Prior to synchronizing with other PEs, shmem_barrier_all ensures -% completion of all previously issued local memory stores and remote -% memory updates issued via shared memory routine calls such as -% shmem_put32(). -% -% Return Value -% None. -% -% EXAMPLES -% The following shmem_barrier_all example is for C/C++ programs: -% -% \lstinputlisting[language=C]{shmem_barrierall_example.c} diff --git a/_deprecated_sources/TEX_FILES/shmem_broadcast.tex b/_deprecated_sources/TEX_FILES/shmem_broadcast.tex deleted file mode 100644 index a27be099d..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_broadcast.tex +++ /dev/null @@ -1,284 +0,0 @@ -\bAPI{SHMEM\_BROADCAST}{Broadcasts a block of data from one \ac{PE} to one or more destination \ac{PE}s.} -\synC %Synopisis for C API - -void shmem_broadcast32(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_broadcast64(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER nelems, PE_root, PE_start, logPE_stride, PE_size -INTEGER pSync(SHMEM_BCAST_SYNC_SIZE) -CALL SHMEM_BROADCAST4(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_BROADCAST8(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_BROADCAST32(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync) -CALL SHMEM_BROADCAST64(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync)%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{OUT}{dest}{A symmetric data object.} -\argRow{IN}{source}{A symmetric data object that can be of any data type that is permissible for the \dest{} argument.} -\argRow{IN}{nelems}{The number of elements in \source. For \FUNC{shmem\_broadcast32} and \FUNC{shmem\_broadcast4}, this is the number of 32-bit - halfwords. nelems must be of type \VAR{size\_t} in \Clang. If you are using \Fortran, it must be a default integer value.} -\argRow{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to the \activeset, - from which the data is copied. Must be greater than - or equal to 0 and less than \VAR{PE\_size}. \VAR{PE\_root} must be of - type integer. If you are using \Fortran, it must be a default integer value.} -\argRow{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of \ac{PE}s. - \VAR{PE\_start} must be of type integer. If you are using - \Fortran, it must be a default integer value.} -\argRow{IN}{logPE\_stride}{ - The log (base 2) of the stride between consecutive - \ac{PE} numbers in the \activeset. \VAR{log\_PE\_stride} must be of - type integer. If you are using \Fortran, it must be a - default integer value.} - -\argRow{IN}{PE\_size}{ - The number of \ac{PE}s in the \activeset. \VAR{PE\_size} must be of - type integer. If you are using \Fortran, it must be a - default integer value.} - -\argRow{IN}{pSync}{ - A symmetric work array. In \CorCpp, \VAR{pSync} must be of type - long and size \CONST{\_SHMEM\_BCAST\_SYNC\_SIZE}. In \Fortran, \VAR{pSync} must be of type integer and size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}. - Every element of this array must be initialized with the - value \CONST{\_SHMEM\_SYNC\_VALUE} (in \CorCpp) or \CONST{SHMEM\_SYNC\_VALUE} (in \Fortran) before any of the \ac{PE}s in the \activeset{} enter - \FUNC{shmem\_broadcast}.} -} -%API description -{ -\openshmem broadcast routines are collective routines. -They copy data object \source{} on the processor specified by \VAR{PE\_root} and -store the values at \dest{} on the other \ac{PE}s specified by the triplet -\VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size}. The data is not copied to the \dest{} area on the root \ac{PE}. - -As with all \openshmem collective routines, each of these routines assumes -that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the -\activeset{} calls an \openshmem collective routine, undefined behavior -results. - -The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \ac{PE}s in the \activeset. The same \dest{} and \source{} data objects and the same \VAR{pSync} work array must be passed to all \ac{PE}s in the \activeset. - -Before any \ac{PE} calls a broadcast routine, you must ensure that the -following conditions exist (synchronization via a barrier or some other -method is often needed to ensure this): The \VAR{pSync} array on all \ac{PE}s in -the \activeset{} is not still in use from a prior call to a broadcast -routine. The \dest{} array on all \ac{PE}s in the \activeset{} is ready to -accept the broadcast data. - -Upon return from a broadcast routine, the following are true for the -local \ac{PE}: If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object -is updated. The values in the \VAR{pSync} array are restored to the original -values. -} -%API Description Tabl -{ -\desTB { -\hfill \\ -The \dest{} and \source{} data objects must conform to certain typing -constraints, which are as follows: } -{ -\cRow{shmem\_broadcast8, shmem\_broadcast64}{Any noncharacter type that has an - element size of \CONST{64} bits. No \Fortran{} - derived types or \CorCpp{} structures are - allowed.} -\cRow{shmem\_broadcast32}{Any noncharacter type that has an - element size of \CONST{32} bits. No \Fortran{} - derived types or \CorCpp{} structures are - allowed.} -\cRow{shmem\_broadcast4}{Any noncharacter type that has an - element size of \CONST{32} bits.} -} -%Return Values -\desR{None.} -% Notes. If there are no notes, this field can be left empty. -\notesB{ - All \openshmem broadcast routines restore \VAR{pSync} to its original contents. - Multiple calls to \openshmem routines that use the same \VAR{pSync} array do not - require that \VAR{pSync} be reinitialized after the first call. - - You must ensure the that the \VAR{pSync} array is not being updated by any \ac{PE} - in the \activeset{} while any of the \ac{PE}s participates in processing of an - \openshmem broadcast routine. Be careful to avoid these situations: If the - \VAR{pSync} array is initialized at run time, some type of synchronization is - needed to ensure that all \ac{PE}s in the \activeset{} have initialized \VAR{pSync} - before any of them enter an \openshmem routine called with the \VAR{pSync} - synchronization array. A \VAR{pSync} array may be reused on a subsequent - \openshmem broadcast routine only if none of the \ac{PE}s in the \activeset{} are - still processing a prior \openshmem broadcast routine call that used the - same \VAR{pSync} array. In general, this can be ensured only by doing some - type of synchronization. -% However, in the special case of \openshmem -% routines being called with the same \activeset, you can allocate two -% \VAR{pSync} arrays and alternate between them on successive calls. -} -} -%Example -\exampleB{ - %For each example, you can enter it as an item. - \exampleITEM - {In the following examples, the call to \FUNC{shmem\_broadcast64} copies \source{} - on \ac{PE} 4 to \dest{} on \ac{PE}s 5, 6, and 7. - - \CorCpp{} example:} - {./EXAMPLES/shmem_broadcast_example.c} - {} - \exampleITEMF - {\Fortran{} example:} - {./EXAMPLES/shmem_broadcast_example.f90} - {} -} -\eAPI - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% Broadcasts a block of data from one processing element (PE) to one or more target PEs. -% -% SYNOPSIS -% C or C++: -% -% void shmem_broadcast32(void *target, const void *source, size_t -% nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, -% long *pSync); -% -% void shmem_broadcast64(void *target, const void *source, size_t -% nlong, int PE_root, int PE_start, int logPE_stride, int PE_size, -% long *pSync); -% -% Fortran: -% -% INTEGER nlong, PE_root, PE_start, logPE_stride, PE_size -% -% INTEGER pSync(SHMEM_BCAST_SYNC_SIZE) -% -% CALL SHMEM_BROADCAST4(target, source, nlong, PE_root, PE_start, -% logPE_stride, PE_size, fIpSync) -% -% CALL SHMEM_BROADCAST8(target, source, nlong, PE_root, PE_start, -% logPE_stride, PE_size, pSync) -% -% CALL SHMEM_BROADCAST32(target, source, nlong, PE_root, PE_start, -% logPE_stride, PE_size, pSync) -% -% CALL SHMEM_BROADCAST64(target, source, nlong, PE_root, PE_start, -% logPE_stride, PE_size, pSync) -% -% DESCRIPTION -% -% Arguments -% -% OUT target A symmetric data object with one of the following data -% types: -% -% Routine Data Type and Language -% -% shmem_broadcast8, -% shmem_broadcast64 Any noncharacter type that has an -% element size of 64 bits. No Fortran -% derived types or C/C++ structures are -% allowed. -% -% shmem_broadcast32 Any noncharacter type that has an -% element size of 32 bits. No Fortran -% derived types or C/C++ structures are -% allowed. -% -% shmem_broadcast4 Any noncharacter type that has an -% element size of 32 bits. -% -% IN source A symmetric data object that can be of any data type that -% is permissible for the target argument. -% -% IN nlong The number of elements in source. For shmem_broadcast32 -% and shmem_broadcast4, this is the number of 32-bit -% halfwords. nlong must be of type integer. If you are -% using Fortran, it must be a default integer value. -% -% IN PE_root Zero-based ordinal of the PE, with respect to the active -% set, from which the data is copied. Must be greater than -% or equal to 0 and less than PE_size. PE_root must be of -% type integer. If you are using Fortran, it must be a -% default integer value. -% -% IN PE_start The lowest PE number of the active set of PEs. -% PE_start must be of type integer. If you are using -% Fortran, it must be a default integer value. -% -% IN logPE_stride -% The log (base 2) of the stride between consecutive -% PE numbers in the active set. log_PE_stride must be of -% type integer. If you are using Fortran, it must be a -% default integer value. -% -% IN PE_size The number of PEs in the active set. PE_size must be of -% type integer. If you are using Fortran, it must be a -% default integer value. -% -% IN pSync A symmetric work array. In C/C++, pSync must be of type -% long and size _SHMEM_BCAST_SYNC_SIZE. In Fortran, pSync -% must be of type integer and size SHMEM_BCAST_SYNC_SIZE. -% Every element of this array must be initialized with the -% value _SHMEM_SYNC_VALUE (in C/C++) or SHMEM_SYNC_VALUE (in -% Fortran) before any of the PEs in the active set enter -% shmem_barrier(). -% -% API Description -% -% OpenSHMEM broadcast routines are collective routines. -% They copy data object source on the processor specified by PE_root and -% store the values at target on the other PEs specified by the triplet -% PE_start, logPE_stride, PE_size. The data is not copied to the target -% area on the root PE. -% -% As with all OpenSHMEM collective routines, each of these routines assumes -% that only PEs in the active set call the routine. If a PE not in the -% active set calls a OpenSHMEM collective routine, undefined behavior -% results. -% -% The values of arguments PE_root, PE_start, logPE_stride, and PE_size -% must be equal on all PEs in the active set. The same target and source -% data objects and the same pSync work array must be passed to all PEs in -% the active set. -% -% Before any PE calls a broadcast routine, you must ensure that the -% following conditions exist (synchronization via a barrier or some other -% method is often needed to ensure this): The pSync array on all PEs in -% the active set is not still in use from a prior call to a broadcast -% routine. The target array on all PEs in the active set is ready to -% accept the broadcast data. -% -% Upon return from a broadcast routine, the following are true for the -% local PE: If the current PE is not the root PE, the target data object -% is updated. The values in the pSync array are restored to the original -% values. -% -% Return Value -% -% None. -% -% NOTES -% -% All OpenSHMEM broadcast routines restore pSync to its original contents. -% Multiple calls to OpenSHMEM routines that use the same pSync array do not -% require that pSync be reinitialized after the first call. -% -% You must ensure the that the pSync array is not being updated by any PE -% in the active set while any of the PEs participates in processing of a -% OpenSHMEM broadcast routine. Be careful to avoid these situations: If the -% pSync array is initialized at run time, some type of synchronization is -% needed to ensure that all PEs in the working set have initialized pSync -% before any of them enter a SHMEM routine called with the pSync -% synchronization array. A pSync array may be reused on a subsequent -% OpenSHMEM broadcast routine only if none of the PEs in the active set are -% still processing a prior OpenSHMEM broadcast routine call that used the -% same pSync array. In general, this can be ensured only by doing some -% type of synchronization. However, in the special case of SHMEM -% routines being called with the same active set, you can allocate two -% pSync arrays and alternate between them on successive calls. -% -% EXAMPLES -% -% In the following examples, the call to shmem_broadcast64 copies source -% on PE 4 to target on PEs 5, 6, and 7. -% -% \lstinputlisting[language=C]{shmem_broadcast_example.c} -% -% \lstinputlisting[language=C]{shmem_broadcast_example.f90} diff --git a/_deprecated_sources/TEX_FILES/shmem_cache.tex b/_deprecated_sources/TEX_FILES/shmem_cache.tex deleted file mode 100644 index d59f03314..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_cache.tex +++ /dev/null @@ -1,118 +0,0 @@ -\bAPI{SHMEM\_CACHE}{Controls data cache utilities.} -\synC %Synopisis for C API - -void shmem_clear_cache_inv(void); -void shmem_set_cache_inv(void); -void shmem_clear_cache_line_inv(void *dest); -void shmem_set_cache_line_inv(void *dest); -void shmem_udcflush(void); -void shmem_udcflush_line(void *dest); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -CALL SHMEM_CLEAR_CACHE_INV -CALL SHMEM_SET_CACHE_INV -CALL SHMEM_SET_CACHE_LINE_INV(dest) -CALL SHMEM_UDCFLUSH -CALL SHMEM_UDCFLUSH_LINE(dest) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{IN}{dest}{A data object that is local to the \ac{PE}. - \VAR{dest} can be of any noncharacter type. If you are using - \Fortran, it can be of any kind.} -} -%API description -{ -\FUNC{shmem\_set\_cache\_inv} enables automatic cache coherency mode. - -\FUNC{shmem\_set\_cache\_line\_inv} enables automatic cache coherency mode for the cache line associated with the address of \VAR{dest} only. - -%IF THIS LINE IS CHANGED, PLEASE MAKE SURE \FUNC{shmem\_set\_cache\ \_inv} DOES NOT SHOW A VISIBLE SPACE BETWEEN cache and _inv. -\FUNC{shmem\_clear\_cache\_inv} disables automatic cache coherency mode -previously enabled by \FUNC{shmem\_set\_cache\ \_inv} or \FUNC{shmem\_set\_cache\_line\_inv}. - -\FUNC{shmem\_udcflush} makes the entire user data cache coherent. - -\FUNC{shmem\_udcflush\_line} makes coherent the cache line that corresponds with -the address specified by \VAR{dest}. -} -%API Description Tabl -{ -%Return Values -\desR{None.} -% Notes. If there are no notes, this field can be left empty. -\notesB{ -These routines have been retained for improved backward compatibility -with legacy architectures. They are not required to be supported by implementing them as \VAR{no-ops} and -where used, they may have no effect on cache line states. -} -} -%Example -\exampleB{ - %For each example, you can enter it as an item. -None. -} -\eAPI - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Controls data cache utilities. -% -% SYNOPSIS -% C or C++: -% -% void shmem_clear_cache_inv(void); -% -% void shmem_set_cache_inv(void); -% -% void shmem_clear_cache_line_inv(void *target); -% -% void shmem_set_cache_line_inv(void *target); -% -% void shmem_udcflush(void); -% -% void shmem_udcflush_line(void *target); -% -% Fortran: -% -% CALL SHMEM_CLEAR_CACHE_INV -% -% CALL SHMEM_SET_CACHE_INV -% -% CALL SHMEM_SET_CACHE_LINE_INV(target) -% -% CALL SHMEM_UDCFLUSH -% -% CALL SHMEM_UDCFLUSH_LINE(target) -% -% DESCRIPTION -% -% Arguments -% -% IN target A data object that is local to the processing element (PE). -% target can be of any noncharacter type. If you are using -% Fortran, it can be of any kind. -% -% API Description -% -% shmem_set_cache_inv enables automatic cache coherency mode. -% -% shmem_set_cache_line_inv enables automatic cache coherency mode for the -% cache line associated with the address of target only. -% -% shmem_clear_cache_inv disables automatic cache coherency mode -% previously enabled by shmem_set_cache_inv or shmem_set_cache_line_inv. -% -% shmem_udcflush makes the entire user data cache coherent. -% -% shmem_udcflush_line makes coherent the cache line that corresponds with -% the address specified by target. -% -% Return Value -% -% None. -% -% NOTES -% These routines have been retained for improved backward compatability -% with legacy architectures. They are not required to be supported and where -% provided they may have no effect on cacheline states. -% diff --git a/_deprecated_sources/TEX_FILES/shmem_collect.tex b/_deprecated_sources/TEX_FILES/shmem_collect.tex deleted file mode 100644 index 8ae1354cd..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_collect.tex +++ /dev/null @@ -1,272 +0,0 @@ -\bAPI{SHMEM\_COLLECT, SHMEM\_FCOLLECT}{Concatenates blocks of data from multiple \ac{PE}s to an array in every \ac{PE}.} -\label{subsec:shmem_collect} -\synC %Synopisis for C API - -void shmem_collect32(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_collect64(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_fcollect32(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_fcollect64(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER nelems -INTEGER PE_start, logPE_stride, PE_size -INTEGER pSync(SHMEM_COLLECT_SYNC_SIZE) -CALL SHMEM_COLLECT4(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_COLLECT8(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_COLLECT32(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_COLLECT64(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT4(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT8(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT32(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT64(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{OUT}{dest}{A symmetric array. The \dest{} argument must be large - enough to accept the concatenation of the \source{} arrays on - all \ac{PE}s. The data types are as follows: - For \FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect8}, and \FUNC{shmem\_fcollect64}, any data type with an element size of 64 - bits. \Fortran{} derived types, \Fortran{} character type, and - \CorCpp{} structures are not permitted. For \FUNC{shmem\_collect4}, - \FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect4}, and \FUNC{shmem\_fcollect32}, any data type with an element size of \CONST{32} bits. \Fortran{} - derived types, \Fortran{} character type, and \CorCpp{} structures are not permitted.} -\argRow{IN}{source}{A symmetric data object that can be of any type - permissible for the \dest{} argument.} -\argRow{IN}{nelems}{The number of elements in the \source{} array. \VAR{nelems} must be - of type \VAR{size\_t} for \Clang. If you are using \Fortran, it must be a - default integer value.} -\argRow{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of \ac{PE}s. - \VAR{PE\_start} must be of type integer. If you are using - \Fortran, it must be a default integer value.} -\argRow{IN}{logPE\_stride}{The log (base \CONST{2}) of the stride between consecutive \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of - type integer. If you are using \Fortran, it must be a default integer value.} -\argRow{IN}{PE\_size}{The number of \ac{PE}s in the \activeset. \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must be a default integer value.} -\argRow{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be of type - long and size \CONST{\_SHMEM\_COLLECT\_SYNC\_SIZE}. In \Fortran, \VAR{pSync} - must be of type integer and size \CONST{SHMEM\_COLLECT\_SYNC\_SIZE}. - If you are using \Fortran, it must be a default integer - value. Every element of this array must be initialized - with the value \CONST{\_SHMEM\_SYNC\_VALUE} in \CorCpp{} or - \CONST{SHMEM\_SYNC\_VALUE} in \Fortran{} before any of the \ac{PE}s in the \activeset{} enter \FUNC{shmem\_collect} or \FUNC{shmem\_fcollect}.} -} -%API description -{ -\OSH{} \FUNC{collect} and \FUNC{fcollect} routines concatenate -\VAR{nelems} \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the \dest{} array, over the set of \ac{PE}s defined by \VAR{PE\_start}, \VAR{log2PE\_stride}, and \VAR{PE\_size}, in processor number order. The resultant \dest{} array -contains the contribution from \ac{PE} \VAR{PE\_start} first, then the contribution -from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. The collected result is written to the \dest{} array for all \ac{PE}s in the \activeset. - -The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all -participating \ac{PE}s, while the \FUNC{collect} routines allow \VAR{nelems} to vary from -\ac{PE} to \ac{PE}. - -As with all \openshmem collective routines, each of these routines assumes -that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the -\activeset{} and calls this collective routine, the behavior is undefined. - -The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \ac{PE}s in the \activeset. The same \dest{} and \source{} arrays -and the same \VAR{pSync} work array must be passed to all \ac{PE}s in the \activeset. - -Upon return from a collective routine, the following are true for the -local \ac{PE}: The \dest{} array is updated. The values in the \VAR{pSync} array -are restored to the original values. -} -{ -{ -%Return Values -\desR{None.} -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ -All \openshmem collective routines reset the values in \VAR{pSync} before they -return, so a particular \VAR{pSync} buffer need only be initialized the first -time it is used. - -You must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} in -the \activeset{} while any of the \ac{PE}s participate in processing of an -\openshmem collective routine. Be careful to avoid these situations: If the -\VAR{pSync} array is initialized at run time, some type of synchronization is -needed to ensure that all \ac{PE}s in the working set have initialized \VAR{pSync} -before any of them enter an \openshmem routine called with the \VAR{pSync} -synchronization array. A \VAR{pSync} array can be reused on a subsequent -\openshmem collective routine only if none of the \ac{PE}s in the \activeset{} are -still processing a prior \openshmem collective routine call that used the -same \VAR{pSync} array. In general, this may be ensured only by doing some -type of synchronization. -%However, in the special case of \openshmem -%routines being called with the same \activeset, you can allocate two -%\VAR{pSync} arrays and alternate between them on successive calls. - -The collective routines operate on active \ac{PE} sets that have a -non-power-of-two \VAR{PE\_size} with some performance degradation. They -operate with no performance degradation when \VAR{nelems} is a -non-power-of-two value. -} -} -%Example -\exampleB{ - \exampleITEM{The following \FUNC{shmem\_collec}t example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_collect_example.c} - {} - %For each example, you can enter it as an item. - \exampleITEMF{The following \FUNC{SHMEM\_COLLECT} example is for \Fortran{} programs:} - {./EXAMPLES/shmem_collect_example.f90} - {} -} -\eAPI - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Concatenates blocks of data from multiple processing -% elements (PEs) to an array in every PE. -% -% SYNOPSIS -% C or C++: -% -% void shmem_collect32(void *target, const void *source, size_t nelems, -% int PE_start, int logPE_stride, int PE_size, long *pSync); -% -% void shmem_collect64(void *target, const void *source, size_t nelems, -% int PE_start, int logPE_stride, int PE_size, long *pSync); -% -% void shmem_fcollect32(void *target, const void *source, size_t -% nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -% -% void shmem_fcollect64(void *target, const void *source, size_t -% nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -% -% Fortran: -% -% INTEGER nelems -% INTEGER PE_start, logPE_stride, PE_size -% INTEGER pSync(SHMEM_COLLECT_SYNC_SIZE) -% -% CALL SHMEM_COLLECT4(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_COLLECT8(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_COLLECT32(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_COLLECT64(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_FCOLLECT4(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_FCOLLECT8(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_FCOLLECT32(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% CALL SHMEM_FCOLLECT64(target, source, nelems, PE_start, logPE_stride, -% PE_size, pSync) -% -% DESCRIPTION -% -% Arguments -% -% OUT target A symmetric array. The target argument must be large -% enough to accept the concatenation of the source arrays on -% all PEs. The data types are as follows: -% For shmem_collect8, shmem_collect64, shmem_fcollect8, and -% shmem_fcollect64, any data type with an element size of 64 -% bits. Fortran derived types, Fortran character type, and -% C/C++ structures are not permitted. For shmem_collect4, -% shmem_collect32, shmem_fcollect4, and shmem_fcollect32, -% any data type with an element size of 32 bits. Fortran -% derived types, Fortran character type, and C/C++ -% structures are not permitted. -% -% IN source A symmetric data object that can be of any type -% permissible for the target argument. -% -% IN nelems The number of elements in the source array. nelems must be -% of type integer. If you are using Fortran, it must be a -% default integer value. -% -% IN PE_start The lowest PE number of the active set of PEs. -% PE_start must be of type integer. If you are using -% Fortran, it must be a default integer value. -% -% IN logPE_stride The log (base 2) of the stride between consecutive -% PE numbers in the active set. logPE_stride must be of -% type integer. If you are using Fortran, it must be a -% default integer value. -% -% IN PE_size The number of PEs in the active set. PE_size must be of -% type integer. If you are using Fortran, it must be a -% default integer value. -% -% IN pSync A symmetric work array. In C/C++, pSync must be of type -% long and size _SHMEM_COLLECT_SYNC_SIZE. In Fortran, pSync -% must be of type integer and size SHMEM_COLLECT_SYNC_SIZE. -% If you are using Fortran, it must be a default integer -% value. Every element of this array must be initialized -% with the value _SHMEM_SYNC_VALUE in C/C++ or -% SHMEM_SYNC_VALUE in Fortran before any of the PEs in the -% active set enter shmem_barrier(). -% -% API Description -% -% OpenSHMEM collect and fcollect routines concatenate -% nelems 64-bit or 32-bit data items from the source array into the target -% array, over the set of PEs defined by PE_start, log2PE_stride, and -% PE_size, in processor number order. The resultant target array -% contains the contribution from PE PE_start first, then the contribution -% from PE PE_start + PE_stride second, and so on. The collected result -% is written to the target array for all PEs in the active set. -% -% The fcollect routines require that nelems be the same value in all -% participating PEs, while the collect routines allow nelems to vary from -% PE to PE. -% -% As with all OpenSHMEM collective routines, each of these routines assumes -% that only PEs in the active set call the routine. If a PE not in the -% active set calls a OpenSHMEM collective routine, undefined behavior -% results. -% -% The values of arguments PE_start, logPE_stride, and PE_size must be -% equal on all PEs in the active set. The same target and source arrays -% and the same pSync work array must be passed to all PEs in the active -% set. -% -% Upon return from a collective routine, the following are true for the -% local PE: The target array is updated. The values in the pSync array -% are restored to the original values. -% -% Return Value -% -% None. -% -% NOTES -% -% All OpenSHMEM collective routines reset the values in pSync before they -% return, so a particular pSync buffer need only be initialized the first -% time it is used. -% -% You must ensure that the pSync array is not being updated on any PE in -% the active set while any of the PEs participate in processing of a -% OpenSHMEM collective routine. Be careful to avoid these situations: If the -% pSync array is initialized at run time, some type of synchronization is -% needed to ensure that all PEs in the working set have initialized pSync -% before any of them enter a OpenSHMEM routine called with the pSync -% synchronization array. A pSync array can be reused on a subsequent -% OpenSHMEM collective routine only if none of the PEs in the active set are -% still processing a prior OpenSHMEM collective routine call that used the -% same pSync array. In general, this may be ensured only by doing some -% type of synchronization. However, in the special case of SHMEM -% routines being called with the same active set, you can allocate two -% pSync arrays and alternate between them on successive calls. -% -% The collective routines operate on active PE sets that have a -% non-power-of-two PE_size with some performance degradation. They -% operate with no performance degradation when nelems is a -% non-power-of-two value. -% -% EXAMPLES -% \lstinputlisting[language=C]{shmem_collect_example.c} -% -% \lstinputlisting[language=C]{shmem_collect_example.f90} diff --git a/_deprecated_sources/TEX_FILES/shmem_cswap.tex b/_deprecated_sources/TEX_FILES/shmem_cswap.tex deleted file mode 100644 index 08387c4f5..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_cswap.tex +++ /dev/null @@ -1,134 +0,0 @@ -\bAPI{SHMEM\_CSWAP}{Performs an atomic conditional swap on a remote data object.} -\synC %Synopisis for C API - -int shmem_int_cswap(int *dest, int cond, int value, int pe); -long shmem_long_cswap(long *dest, long cond, long value, int pe); -long shmem_longlong_cswap(long long *dest, long long cond, long long value, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER pe -INTEGER*4 SHMEM_INT4_CSWAP, cond_i4, value_i4, ires_i4 -ires_i4 = SHMEM_INT4_CSWAP(dest, cond_i4, value_i4, pe) -INTEGER*8 SHMEM_INT8_CSWAP, cond_i8, value_i8, ires_i8 -ires_i8 = SHMEM_INT8_CSWAP(dest, cond_i8, value_i8, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{OUT}{dest}{The remotely accessible integer data object to be updated on - the remote \ac{PE}. } - % If you are using C/C++, the data type of - % dest should match that implied in the SYNOPSIS section.} -\argRow{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} value. If \VAR{cond} and the - remote \VAR{dest} are equal, then \VAR{value} is swapped into the - remote \VAR{dest}. Otherwise, the remote \VAR{dest} is unchanged. - In either case, the old value of the remote \VAR{dest} is - returned as the routine return value. \VAR{cond} must be of the - same data type as \VAR{dest}.} -\argRow{IN}{value}{The value to be atomically written to the remote \ac{PE}. \VAR{value} must be the same data type as \VAR{dest}.} -\argRow{IN}{pe}{An integer that indicates the \ac{PE} number upon which \VAR{dest} is to be updated. If you are using \Fortran, it must be a default integer value.} -} -%API description -{ -The conditional swap routines conditionally update a \VAR{dest} data object -on the specified \ac{PE} and return the prior contents of the data object in one atomic operation. -} -%API Description Tabl -{ -\hfill \\ -\desTB {The \VAR{dest} and source data objects must conform to certain typing -constraints, which are as follows: } -{ -\cRow{SHMEM\_INT4\_CSWAP}{\CONST{4}-byte integer.} -\cRow{SHMEM\_INT8\_CSWAP}{\CONST{8}-byte integer.} -} -\desR{The contents that had been in the \VAR{dest} data object on the remote \ac{PE} -prior to the conditional swap. Data type is the same as the \VAR{dest} data type.} -% Notes. If there are no notes, this field can be left empty. -\notesB{None.} -} -%Example -\exampleB{ - %For each example, you can enter it as an item. - \exampleITEM - {The following call ensures that the first \ac{PE} to execute the - conditional swap will successfully write its \ac{PE} number to \VAR{race\_winner} - on \ac{PE} \CONST{0}.} - {./EXAMPLES/shmem_cswap_example.c} - {} -} -\eAPI -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Performs an atomic conditional swap to a remote -% data object. -% -% SYNOPSIS -% C or C++: -% -% int shmem_int_cswap(int *target, int cond, int value, int pe); -% -% long shmem_long_cswap(long *target, long cond, long value, int pe); -% -% long shmem_longlong_cswap(long long *target, long long cond, long long -% value, int pe); -% -% Fortran: -% -% INTEGER pe -% -% INTEGER(KIND=4) SHMEM_INT4_CSWAP -% ires = SHMEM_INT4_CSWAP(target, cond, value, pe) -% -% INTEGER(KIND=8) SHMEM_INT8_CSWAP -% ires = SHMEM_INT8_CSWAP(target, cond, value, pe) -% -% DESCRIPTION -% -% Arguments -% -% -% OUT target The remotely accessible integer data object to be updated on -% the remote PE. If you are using C/C++, the data type of -% target should match that implied in the SYNOPSIS section. If -% you are using Fortran, it must be of the following type: -% -% Routine Data Type -% -% SHMEM_INT4_CSWAP 4-byte integer -% -% SHMEM_INT8_CSWAP 8-byte integer -% -% IN cond cond is compared to the remote target value. If cond and the -% remote target are equal, then value is swapped into the -% remote target. Otherwise, the remote target is unchanged. -% In either case, the old value of the remote target is -% returned as the function return value. cond must be of the -% same data type as target. -% -% IN value The value to be atomically written to the remote PE. value -% must be the same data type as target. -% -% IN pe An integer that indicates the PE number upon which target is -% to be updated. If you are using Fortran, it must be a -% default integer value. -% API Description -% -% The conditional swap routines conditionally update a target data object -% on an arbitrary processing element (PE) and return the prior contents -% of the data object in one atomic operation. -% -% Return Value -% -% The contents that had been in the target data object on the remote PE -% prior to the conditional swap. Data type is the same as the target data type. -% -% -% NOTES -% -% -% EXAMPLES -% -% The following call ensures that the first PE to exectute the -% conditional swap will successfully write its PE number to race_winner -% on PE 0. -% -% \lstinputlisting[language=C]{shmem_cswap_example.c} diff --git a/_deprecated_sources/TEX_FILES/shmem_fadd.tex b/_deprecated_sources/TEX_FILES/shmem_fadd.tex deleted file mode 100644 index 919a9c386..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_fadd.tex +++ /dev/null @@ -1,113 +0,0 @@ -\bAPI{SHMEM\_FADD}{Performs an atomic fetch-and-add operation on a remote data object.} -\synC %Synopisis for C API - -int shmem_int_fadd(int *dest, int value, int pe); -long shmem_long_fadd(long *dest, long value, int pe); -long long shmem_longlong_fadd(long long *dest, long long value, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER pe -INTEGER*4 SHMEM_INT4_FADD, ires_i4, value_i4 -ires_i4 = SHMEM_INT4_FADD(dest, value_i4, pe) -INTEGER*8 SHMEM_INT8_FADD, ires_i8, value_i8 -ires_i8 = SHMEM_INT8_FADD(dest, value_i8, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{OUT}{dest}{The remotely accessible integer data object to be updated on - the remote \ac{PE}. The type of \VAR{dest} should match that implied - in the SYNOPSIS section.} -\argRow{IN}{value}{The value to be atomically added to \VAR{dest}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} -\argRow{IN}{pe}{An integer that indicates the \ac{PE} number on which \VAR{dest} is to be updated. If you are using \Fortran, it must be a default integer value.} -} -%API description -{ -\FUNC{shmem\_fadd} routines perform an atomic fetch-and-add operation. An -atomic fetch-and-add operation fetches the old \VAR{dest} and adds \VAR{value} to -\VAR{dest} without the possibility of another atomic operation on the \VAR{dest} -between the time of the fetch and the update. These routines add \VAR{value} -to \VAR{dest} on \VAR{pe} and return the previous -contents of \VAR{dest} as an atomic operation. -} -%API Description Table. -{ - \hfill \\ -\desTB {If you are using \Fortran, \VAR{dest} must be of the following type:} -{ -\cRow{SHMEM\_INT4\_FADD}{\CONST{4}-byte integer} -\cRow{SHMEM\_INT8\_FADD}{\CONST{8}-byte integer} -} - -%Return Values -\desR{The contents that had been at the \VAR{dest} address on the remote \ac{PE} prior to the atomic addition operation. The data type of the return value is the same as the \VAR{dest}.} -% Notes. If there are no notes, this field can be left empty. -\notesB{None.} -} - -\exampleB{ - \exampleITEM - {The following \FUNC{shmem\_fadd} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_fadd_example.c} - {} -} -\eAPI -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Performs an atomic fetch-and-add operation on a -% remote data object. -% -%SYNOPSIS -% -% C or C++: -% -% int shmem_int_fadd(int *target, int value, int pe); -% -% long shmem_long_fadd(long *target, long value, int pe); -% -% long long shmem_longlong_fadd(long long *target, long long value, -% int pe); -% -% Fortran: -% -% INTEGER pe -% -% INTEGER(KIND=4) SHMEM_INT4_FADD, ires, target, value -% ires = SHMEM_INT4_FADD(target, value, pe) -% -% INTEGER(KIND=8) SHMEM_INT8_FADD, ires, target, value -% ires = SHMEM_INT8_FADD(target, value, pe) -% -% -%DESCRIPTION -% -%Arguments -% -% OUT target The remotely accessible integer data object to be updated on -% the remote PE. The type of target should match that implied -% in the SYNOPSIS section. -% -% IN value The value to be atomically added to target. The type of -% value should match that implied in the SYNOPSIS section. -% -% IN pe An integer that indicates the PE number on which target is to -% be updated. If you are using Fortran, it must be a default -% integer value. -%API Description -% -% shmem_fadd functions perform an atomic fetch-and-add operation. An -% atomic fetch-and-add operation fetches the old target and adds value to -% target without the possibility of another atomic operation on the target -% between the time of the fetch and the update. These routines add value -% to target on Processing Element (PE) pe and return the previous -% contents of target as an atomic operation. -% -%Return Value -% -% The contents that had been at the target address on the remote PE prior -% to the atomic addition operation. The data type of return value is the same as the target. -% -% -%NOTES -% -%EXAMPLE -% -% \lstinputlisting[language=C]{shmem_fadd_example.c} diff --git a/_deprecated_sources/TEX_FILES/shmem_fence.tex b/_deprecated_sources/TEX_FILES/shmem_fence.tex deleted file mode 100644 index 4fda8b831..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_fence.tex +++ /dev/null @@ -1,43 +0,0 @@ -\bAPI{SHMEM\_FENCE}{Assures ordering of delivery of \PUT{}, \acp{AMO}, and memory store routines to symmetric data objects.} -\synC %Synopisis for C API - -void shmem_fence(void); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_FENCE %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{None.}{}{} - } -%API description -{ -This routine assures ordering of delivery of \PUT{}, \acp{AMO}, and memory store routines to symmetric data objects. -%This function ensures ordering of \PUT{}, \acp{AMO} and store operations on symmetric data objects. -All \PUT{}, \acp{AMO}, and memory store routines to symmetric data objects issued to a particular remote \ac{PE} prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any subsequent \PUT{}, \acp{AMO}, and memory store routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, not completion. -% which follow the call to -% \FUNC{shmem\_fence}. -} -{ -%API Description Table. -\desR{ - %Return Values - None. -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ - \FUNC{shmem\_fence} only provides per-\ac{PE} ordering guarantees and does not guarantee completion of delivery. There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} guarantees completion of \PUT{}, \acp{AMO}, and memory store routines to symmetric data objects which makes the updates visible to all other \acp{PE}. - - The \FUNC{shmem\_quiet} routine should be called if completion of PUT{}, \acp{AMO}, and memory store routines to symmetric data objects is desired when multiple remote \ac{PE}s are involved. -} -} % end of DesB - -\exampleB{ - \exampleITEM - { The following \FUNC{shmem\_fence} example is for \CorCpp{} programs: } - {./EXAMPLES/shmem_fence_example.c} - {\VAR{Put1} will be ordered to be delivered before \VAR{put3} and \VAR{put2} will be ordered to be delivered before - \VAR{put4}.} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_finalize.tex b/_deprecated_sources/TEX_FILES/shmem_finalize.tex deleted file mode 100644 index 01b0f7eee..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_finalize.tex +++ /dev/null @@ -1,57 +0,0 @@ -\bAPI{SHMEM\_FINALIZE}{A collective operation that releases resources used by -the \openshmem library. This only terminates the \openshmem portion of a -program, not the entire program.} -\synC -void shmem_finalize(void); %*\synCE -\synF -CALL SHMEM_FINALIZE %*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{None.}{}{} -} -%API Description -{ - \FUNC{shmem\_finalize} is a collective operation that ends the \openshmem - portion of a program previously initialized by \FUNC{shmem\_init} and - releases resources used by the \openshmem library. This collective - operation requires all \acp{PE} to participate in the call. There is an - implicit global barrier in \FUNC{shmem\_finalize} so that pending - communication is completed, and no resources can be released until all - \acp{PE} have entered \FUNC{shmem\_finalize}. \FUNC{shmem\_finalize} must be - the last \openshmem library call encountered in the \openshmem portion of a - program. A call to \FUNC{shmem\_finalize} will release any resources - initialized by a corresponding call to \FUNC{shmem\_init}. All processes and - threads that represent the \acp{PE} will still exist after the call to - \FUNC{shmem\_finalize} returns, but they will no longer have access to any - resources that have been released. -} -%API Description Table -{ -%Return Values -\desR{None.} -%NOTES -\notesB{ - \FUNC{shmem\_finalize} releases all resources used by the \openshmem library - including the symmetric memory heap and pointers initiated by - \FUNC{shmem\_ptr}. This collective operation requires all \acp{PE} to - participate in the call, not just a subset of the \acp{PE}. The - non-\openshmem portion of a program may continue after a call to - \FUNC{shmem\_finalize} by all \acp{PE}. There is an implicit - \FUNC{shmem\_finalize} at the end of main, so that having an explicit call - to \FUNC{shmem\_finalize} is optional. However, an explicit - \FUNC{shmem\_finalize} may be required as an entry point for wrappers used - by profiling or other tools that need to perform their own finalization. -} -} -%EXAMPLES -\exampleB{ - \exampleITEM - {The following finalize example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_finalize_example.c} - {} -} - -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_finc.tex b/_deprecated_sources/TEX_FILES/shmem_finc.tex deleted file mode 100644 index ac3d16005..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_finc.tex +++ /dev/null @@ -1,57 +0,0 @@ -\bAPI{SHMEM\_FINC}{Performs an atomic fetch-and-increment operation on a remote data object.} -\synC %Synopisis for C API - -int shmem_int_finc(int *dest, int pe); -long shmem_long_finc(long *dest, int pe); -long long shmem_longlong_finc(long long *dest, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -%\rcomment{Manju: Do we need the pe, ires4, ires8 ? } - -\synF %Synopsis for FORTRAN API - -INTEGER pe -INTEGER*4 SHMEM_INT4_FINC, ires_i4 -ires_i4 = SHMEM_INT4_FINC(dest, pe) -INTEGER*8 SHMEM_INT8_FINC, ires_i8 -ires_i8 = SHMEM_INT8_FINC(dest, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{IN}{dest}{The remotely accessible integer data object to be updated on - the remote \ac{PE}. The type of \dest{} should match that implied - in the SYNOPSIS section.} - \argRow{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to - be updated. If you are using \Fortran, it must be a default - integer value.} - } -%API description -{ - These routines perform a fetch-and-increment operation. The \dest{} on - \ac{PE} \VAR{pe} is increased by one and the routine returns - the previous contents of \dest{} as an atomic operation. -} -%API Description Table. -{ \hfill \\ -\desTB {If you are using \Fortran, \VAR{dest} must be of the following type:} -{ -\cRow{SHMEM\_INT4\_FINC}{\CONST{4}-byte integer} -\cRow{SHMEM\_INT8\_FINC}{\CONST{8}-byte integer} -} - %Return Values -\desR{The contents that had been at the \dest{} address on the remote \ac{PE} prior to the increment. The data type of the return value is the same as the \dest.} -% Notes. If there are no notes, this field can be left empty. -\notesB{None.} -} -%\rcomment{Manju: Seems like the indent for notes and return depends on whether -%there is a newline before (\ac{API} Description Table) or not. We might have to fix -%it} -\exampleB{ - \exampleITEM - {The following \FUNC{shmem\_finc} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_finc_example.c} - {} -} -\eAPI - - - - diff --git a/_deprecated_sources/TEX_FILES/shmem_g.tex b/_deprecated_sources/TEX_FILES/shmem_g.tex deleted file mode 100644 index 502578975..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_g.tex +++ /dev/null @@ -1,49 +0,0 @@ -\bAPI{SHMEM\_G}{Transfers one data item from a remote \ac{PE}} -\synC %Synopisis for C API - -char shmem_char_g(char *addr, int pe); -short shmem_short_g(short *addr, int pe); -int shmem_int_g(int *addr, int pe); -long shmem_long_g(long *addr, int pe); -long long shmem_longlong_g(long long *addr, int pe); -float shmem_float_g(float *addr, int pe); -double shmem_double_g(double *addr, int pe); -long double shmem_longdouble_g(long double *addr, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{IN}{addr}{The remotely accessible array element or scalar data object.} - \argRow{IN}{pe}{The number of the remote \ac{PE} on which \VAR{addr} resides.} - } -%API description -{ - These routines provide a very low latency get capability for single elements of most basic types. -} -%This newline is required -{ -%API Description Table. -\desR{ - %Return Values - {Returns a single element of type specified in the synopsis.} -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ None. - } -} -\exampleB{ - \exampleITEM - {The following \FUNC{shmem\_long\_g} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_g_example.c} - {} -} -\eAPI - - - - - - - - - - diff --git a/_deprecated_sources/TEX_FILES/shmem_get.tex b/_deprecated_sources/TEX_FILES/shmem_get.tex deleted file mode 100644 index 5e8cbed45..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_get.tex +++ /dev/null @@ -1,89 +0,0 @@ -\bAPI{SHMEM\_GET}{Copies data from a specified \ac{PE}.} -\synC %Synopisis for C API - -void shmem_double_get(double *dest, const double *source, size_t nelems, int pe); -void shmem_float_get(float *dest, const float *source, size_t nelems, int pe); -void shmem_get32(void *dest, const void *source, size_t nelems, int pe); -void shmem_get64(void *dest, const void *source, size_t nelems, int pe); -void shmem_get128(void *dest, const void *source, size_t nelems, int pe); -void shmem_getmem(void *dest, const void *source, size_t nelems, int pe); -void shmem_int_get(int *dest, const int *source, size_t nelems, int pe); -void shmem_long_get(long *dest, const long *source, size_t nelems, int pe); -void shmem_longdouble_get(long double *dest, const long double *source, size_t nelems, int pe); -void shmem_longlong_get(long long *dest, const long long *source, size_t nelems, int pe); -void shmem_short_get(short *dest, const short *source, size_t nelems, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER nelems, pe -CALL SHMEM_CHARACTER_GET(dest, source, nelems, pe) -CALL SHMEM_COMPLEX_GET(dest, source, nelems, pe) -CALL SHMEM_DOUBLE_GET(dest, source, nelems, pe) -CALL SHMEM_GET4(dest, source, nelems, pe) -CALL SHMEM_GET8(dest, source, nelems, pe) -CALL SHMEM_GET32(dest, source, nelems, pe) -CALL SHMEM_GET128(dest, source, nelems, pe) -CALL SHMEM_GETMEM(dest, source, nelems, pe) -CALL SHMEM_INTEGER_GET(dest, source, nelems, pe) -CALL SHMEM_LOGICAL_GET(dest, source, nelems, pe) -CALL SHMEM_REAL_GET(dest, source, nelems, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{OUT}{dest}{Local data object to be updated.} - \argRow{IN}{source}{Data object on the \ac{PE} identified by \VAR{pe} that contains the data to be copied. This data object must be remotely accessible.} - \argRow{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are using \Fortran, it must be a constant, variable, or array element of default integer type.} - \argRow{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be of type integer. If you are using \Fortran, it must be a constant, variable, or array element of default integer type.} - } -%API description -{ - The get routines provide a method for copying a - contiguous symmetric data object from a different \ac{PE} to a contiguous data object - on the local \ac{PE}. The routines return after the data has been - delivered to the \dest{} array on the local \ac{PE}. -} -%API Description Table. -{ - \desTB{The \dest{} and \source{} data objects must conform to typing constraints, - which are as follows:} - { - \cRow{shmem\_getmem}{\Fortran: Any noncharacter type. \Clang: Any data type. - nelems is scaled in bytes.} - \cRow{ shmem\_get4, shmem\_get32}{Any noncharacter type that has a storage - size equal to \CONST{32} bits.} - \cRow{shmem\_get8, shmem\_get64}{Any noncharacter type that has a storage - size equal to \CONST{64} bits.} - \cRow{shmem\_get128}{Any noncharacter type that has a storage - size equal to \CONST{128} bits.} - \cRow{shmem\_short\_get}{Elements of type short.} - \cRow{shmem\_int\_get}{Elements of type int.} - \cRow{shmem\_long\_get}{Elements of type long.} - \cRow{shmem\_longlong\_get}{Elements of type long long.} - \cRow{shmem\_float\_get}{Elements of type float.} - \cRow{shmem\_double\_get}{Elements of type double.} - \cRow{shmem\_longdouble\_get}{Elements of type long double.} - \cRow{SHMEM\_CHARACTER\_GET}{Elements of type character. \VAR{nelems} is the - number of characters to transfer. The - actual character lengths of the \source{} - and \dest{} variables are ignored.} - \cRow{SHMEM\_COMPLEX\_GET}{Elements of type complex of default - size.} - \cRow{SHMEM\_DOUBLE\_GET}{\Fortran: Elements of type double precision.} - \cRow{SHMEM\_INTEGER\_GET}{Elements of type integer.} - \cRow{SHMEM\_LOGICAL\_GET}{Elements of type logical.} - \cRow{SHMEM\_REAL\_GET}{Elements of type real.} - } - %Return Values - \desR{None.} - \notesB{ - See Introduction for a definition of the term remotely accessible. If you are using \Fortran, data types must be of default size. For - example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or - \CONST{REAL(KIND=KIND(1.0))}. - } -} -\exampleB{ - \exampleITEMF - {Consider this example for \Fortran.} - {./EXAMPLES/shmem_get_example.f90} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_global_exit.tex b/_deprecated_sources/TEX_FILES/shmem_global_exit.tex deleted file mode 100644 index 195c95470..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_global_exit.tex +++ /dev/null @@ -1,62 +0,0 @@ -\bAPI{SHMEM\_GLOBAL\_EXIT}{A routine that allows any \ac{PE} to force -termination of an entire program.} -%SYNOPSIS -\synC -void shmem_global_exit(int status); %*\synCE -\synF -INTEGER STATUS -CALL SHMEM_GLOBAL_EXIT(status) %*\synFE - -%DESCRIPTION - -%Arguments -\desB{ - \argRow{IN}{status}{The exit status from the main program.} -} -%API Description -{ - \FUNC{shmem\_global\_exit} is a non-collective routine that allows any one - \ac{PE} to force termination of an \openshmem program for all \acp{PE}, - passing an exit status to the execution environment. This routine terminates - the entire program, not just the \openshmem portion. When any \ac{PE} calls - \FUNC{shmem\_global\_exit}, it results in the immediate notification to all - \acp{PE} to terminate. \FUNC{shmem\_global\_exit} flushes I/O and releases - resources in accordance with C/C++/Fortran language requirements for normal - program termination. If more than one \ac{PE} calls - \FUNC{shmem\_global\_exit}, then the exit status returned to the environment - shall be one of the values passed to \FUNC{shmem\_global\_exit} as the - status argument. There is no return to the caller of - \FUNC{shmem\_global\_exit}; control is returned from the \openshmem program - to the execution environment for all \acp{PE}. -} -%API Description Table -{ -%Return Value -\desR{ -None.} -%NOTES -\notesB{ - \FUNC{shmem\_global\_exit} may be used in situations where one or more - \acp{PE} have determined that the program has completed and/or should - terminate early. Accordingly, the integer status argument can be used to - pass any information about the nature of the exit, e.g an encountered error - or a found solution. Since \FUNC{shmem\_global\_exit} is a non-collective - routine, there is no implied synchronization, and all \acp{PE} must - terminate regardless of their current execution state. While I/O must be - flushed for standard language I/O calls from C/C++/Fortran, it is - implementation dependent as to how I/O done by other means (e.g. third - party I/O libraries) is handled. Similarly, resources are released - according to C/C++/Fortran standard language requirements, but this may not - include all resources allocated for the \openshmem program. However, a - quality implementation will make a best effort to flush all I/O and clean - up all resources. -} -} -%EXAMPLES -\exampleB{ -\exampleITEM - {} - {./EXAMPLES/shmem_global_exit_example.c} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_iget.tex b/_deprecated_sources/TEX_FILES/shmem_iget.tex deleted file mode 100644 index c098596d5..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_iget.tex +++ /dev/null @@ -1,90 +0,0 @@ -\bAPI{SHMEM\_IGET}{Copies strided data from a specified \ac{PE}.} -\label{subsec:shmem_iget} -\synC %Synopisis for C API - -void shmem_double_iget(double *dest, const double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_float_iget(float *dest, const float *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_iget32(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_iget64(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_iget128(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_int_iget(int *dest, const int *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_long_iget(long *dest, const long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_longdouble_iget(long double *dest, const long double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_longlong_iget(long long *dest, const long long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_short_iget(short *dest, const short *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER dst, sst, nelems, pe -CALL SHMEM_COMPLEX_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_DOUBLE_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET4(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET8(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET32(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET64(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET128(dest, source, dst, sst, nelems, pe) -CALL SHMEM_INTEGER_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_LOGICAL_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_REAL_IGET(dest, source, dst, sst, nelems, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{OUT}{dest}{Array to be updated on the local \ac{PE}. } - \argRow{IN}{source}{Array containing the data to be copied on the remote \ac{PE}.} - \argRow{IN}{dst}{The stride between consecutive elements of the \dest{} - array. The stride is scaled by the element size of the \dest{} array. - A value of \CONST{1} indicates contiguous data. \VAR{dst} must be of type - \textit{ptrdiff\_t}. If you are calling from \Fortran, it must be a default integer value.} - \argRow{IN}{sst}{The stride between consecutive elements of the \source{} array. - The stride is scaled by the element size of the \source{} array. - A value of \CONST{1} indicates contiguous data. \VAR{sst} must be of type - \textit{ptrdiff\_t}. If you are calling from \Fortran, it must be a default integer value.} - \argRow{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must - be of type \VAR{size\_t} for \Clang. If you are using \Fortran, it must be a - constant, variable, or array element of default integer type.} - \argRow{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be of type integer. If you are using \Fortran, it must be a constant, variable, or - array element of default integer type.} -} -%API description -{ - The \FUNC{iget} routines provide a method for copying strided data elements from a - symmetric array from a specified remote \ac{PE} to strided locations on a local array. - The routines return when the data has been copied into the local \VAR{dest} - array.} -%This newline is required -%API Description Table. -{ -\hfill \\ - \desTB{The \VAR{dest} and \VAR{source} data objects must conform to typing constraints, which are as follows:} - { - \cRow{shmem\_iget32, shmem\_iget4}{Any noncharacter type that has a storage - size equal to \CONST{32} bits.} - \cRow{shmem\_iget64, shmem\_iget8}{Any noncharacter type that has a storage - size equal to \CONST{64} bits.} - \cRow{shmem\_iget128}{Any noncharacter type that has a storage - size equal to \CONST{128} bits.} - \cRow{shmem\_short\_iget}{Elements of type short.} - \cRow{ shmem\_int\_iget}{Elements of type int.} - \cRow{shmem\_long\_iget}{Elements of type long.} - \cRow{shmem\_longlong\_iget}{Elements of type long long.} - \cRow{shmem\_float\_iget}{Elements of type float.} - \cRow{shmem\_double\_iget}{Elements of type double.} - \cRow{shmem\_longdouble\_iget}{Elements of type long double.} - \cRow{SHMEM\_COMPLEX\_IGET}{Elements of type complex of default size.} - \cRow{SHMEM\_DOUBLE\_IGET}{\Fortran: Elements of type double precision.} - \cRow{SHMEM\_INTEGER\_IGET}{Elements of type integer.} - \cRow{SHMEM\_LOGICAL\_IGET}{Elements of type logical.} - \cRow{SHMEM\_REAL\_IGET}{Elements of type real.} - } - %Return Values - \desR{None.} - \notesB{If you are using \Fortran, data types must be of default size. For - example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or \CONST{REAL(KIND=KIND(1.0))}.} -} -\exampleB{ - \exampleITEMF - {The following example uses \FUNC{shmem\_logical\_iget} in a \Fortran{} - program.} - {./EXAMPLES/shmem_iget_example.f90} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_inc.tex b/_deprecated_sources/TEX_FILES/shmem_inc.tex deleted file mode 100644 index 551dd6dde..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_inc.tex +++ /dev/null @@ -1,52 +0,0 @@ -\bAPI{SHMEM\_INC}{Performs an atomic increment operation on a remote data object.} -\synC %Synopisis for C API - -void shmem_int_inc(int *dest, int pe); -void shmem_long_inc(long *dest, int pe); -void shmem_longlong_inc(long long *dest, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -INTEGER pe -CALL SHMEM_INT4_INC(dest, pe) -CALL SHMEM_INT8_INC(dest, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{IN}{dest}{The remotely accessible integer data object to be updated on - the remote \ac{PE}. The type of \dest{} should match that implied - in the SYNOPSIS section.} - \argRow{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to - be updated. If you are using \Fortran{}, it must be a default - integer value.} - } -%API description -{ - These routines perform an atomic increment operation on the \VAR{dest} data object on \ac{PE}. -} -{ -%API Description Table. - \hfill \\ -\desTB {If you are using \Fortran, \VAR{dest} must be of the following type:} -{ -\cRow{SHMEM\_INT4\_INC}{\CONST{4}-byte integer} -\cRow{SHMEM\_INT8\_INC}{\CONST{8}-byte integer} -} - -\desR{ - %Return Values - None. -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ - The term remotely accessible is defined in the Introduction. -} -} % end of DesB - -\exampleB{ - \exampleITEM - { The following \FUNC{shmem\_int\_inc} example is for \CorCpp{} programs: } - {./EXAMPLES/shmem_inc_example.c} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_init.tex b/_deprecated_sources/TEX_FILES/shmem_init.tex deleted file mode 100644 index d35e042da..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_init.tex +++ /dev/null @@ -1,47 +0,0 @@ -\bAPI{SHMEM\_INIT}{A collective operation that allocates and initializes the -resources used by the \openshmem library.} -\synC -void shmem_init(void); %*\synCE -\synF -CALL SHMEM_INIT() %*\synFE - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{None.}{}{} -} -%API description -{ - \FUNC{shmem\_init} allocates and initializes resources used by the - \openshmem library. It is a collective operation that all \acp{PE} must - call before any other \openshmem routine may be called. At the end of the - \openshmem program which it initialized, the call to \FUNC{shmem\_init} - must be matched with a call to \FUNC{shmem\_finalize}. After a single - call to \FUNC{shmem\_init}, a subsequent call to \FUNC{shmem\_init} - in the same program results in undefined behavior. -} -%API Description Table. -{ -%Return Values -\desR{ None. } -\notesB{ - As of \openshmem Specification 1.2 the use of \FUNC{start\_pes} has been - deprecated and is replaced with \FUNC{shmem\_init}. While support for - \FUNC{start\_pes} is still required in \openshmem libraries, users are - encouraged to use \FUNC{shmem\_init}. Replacing \FUNC{start\_pes} with - \FUNC{shmem\_init} in \openshmem programs with no further changes is - possible; there is an implicit \FUNC{shmem\_finalize} at the end of main. - However, \FUNC{shmem\_init} differs slightly from \FUNC{start\_pes}: - multiple calls to \FUNC{shmem\_init} within a program results in undefined - behavior, while in the case of \FUNC{start\_pes}, any subsequent calls to - \FUNC{start\_pes} after the first one resulted in a no-op. -} -}%end of DesB -%Example -\exampleB{ -%For each example, you can enter it as an item. - \exampleITEMF - { This is a simple program that calls \FUNC{shmem\_init}:} - {./EXAMPLES/shmem_init_example.f90} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_iput.tex b/_deprecated_sources/TEX_FILES/shmem_iput.tex deleted file mode 100644 index 9fb1704fc..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_iput.tex +++ /dev/null @@ -1,86 +0,0 @@ -\bAPI{SHMEM\_IPUT}{Copies strided data to a specified \ac{PE}.} -\label{subsec:shmem_iput} -\synC -void shmem_double_iput(double *dest, const double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_float_iput(float *dest, const float *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_int_iput(int *dest, const int *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_iput32(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_iput64(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_iput128(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_long_iput(long *dest, const long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_longdouble_iput(long double *dest, const long double *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_longlong_iput(long long *dest, const long long *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); -void shmem_short_iput(short *dest, const short *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER dst, sst, nelems, pe -CALL SHMEM_COMPLEX_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_DOUBLE_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_INTEGER_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT4(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT8(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT32(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT64(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT128(dest, source, dst, sst, nelems, pe) -CALL SHMEM_LOGICAL_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_REAL_IPUT(dest, source, dst, sst, nelems, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\desB{ -\argRow{OUT}{dest}{Array to be updated on the remote \ac{PE}. This data object must - be remotely accessible.} -\argRow{IN}{source}{Array containing the data to be copied.} -\argRow{IN}{dst}{The stride between consecutive elements of the \dest{} array. - The stride is scaled by the element size of the \dest{} array. - A value of \CONST{1} indicates contiguous data. \VAR{dst} must be of type - \textit{ptrdiff\_t}. If you are using \Fortran, it must be a default integer value.} -\argRow{IN}{sst}{The stride between consecutive elements of the \source{} array. - The stride is scaled by the element size of the \source{} array. - A value of \CONST{1} indicates contiguous data. \VAR{sst} must be of type - \textit{ptrdiff\_t}. If you are using \Fortran, it must be a default - integer value.} -\argRow{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays. \VAR{nelems} must - be of type \VAR{size\_t} for \Clang. If you are using \Fortran, it must be a - constant, variable, or array element of default integer type.} -\argRow{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be of type integer. If you are using \Fortran, it must be a constant, variable, or - array element of default integer type.} -} -{ - - The \FUNC{iput} routines provide a method for copying strided data elements (specified by \VAR{sst}) of an array from a \source{} array on the local \ac{PE} to locations specified by stride \VAR{dst} on a \dest{} array on specified remote \ac{PE}. Both strides, \VAR{dst} and \VAR{sst} must be greater than or equal to \CONST{1}. The routines return when the data has been copied out of the \VAR{source} array on the local \ac{PE} but not necessarily before the data has been delivered to the remote data object. -}{ - \desTB{ - The \dest{} and \source{} data objects must conform to typing constraints, - which are as follows:} - { - \cRow{shmem\_iput32, shmem\_iput4}{Any noncharacter type that has a storage - size equal to \CONST{32} bits.} - \cRow{shmem\_iput64, shmem\_iput8}{Any noncharacter type that has a storage - size equal to \CONST{64} bits.} - \cRow{shmem\_iput128}{Any noncharacter type that has a storage - size equal to \CONST{128} bits.} - \cRow{shmem\_short\_iput}{Elements of type short.} - \cRow{shmem\_int\_iput}{Elements of type int.} - \cRow{shmem\_long\_iput}{Elements of type long.} - \cRow{shmem\_longlong\_iput}{Elements of type long long.} - \cRow{shmem\_float\_iput}{Elements of type float.} - \cRow{shmem\_double\_iput}{Elements of type double.} - \cRow{shmem\_longdouble\_iput}{Elements of type long double.} - \cRow{SHMEM\_COMPLEX\_IPUT}{Elements of type complex of default size.} - \cRow{SHMEM\_DOUBLE\_IPUT}{Elements of type double precision.} - \cRow{SHMEM\_INTEGER\_IPUT}{Elements of type integer.} - \cRow{SHMEM\_LOGICAL\_IPUT}{Elements of type logical.} - \cRow{SHMEM\_REAL\_IPUT}{Elements of type real.} - } -\desR{None.} -\notesB{ - If you are using \Fortran, data types must be of default size. For - example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4} or - \CONST{REAL(KIND=KIND(1.0))}. See Introduction for a definition of the term remotely accessible. -} -} -\exampleB{ - \exampleITEM - {Consider the following \FUNC{shmem\_short\_iput} example for \CorCpp{} - programs.} {./EXAMPLES/shmem_iput_example.c}{} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_lock.tex b/_deprecated_sources/TEX_FILES/shmem_lock.tex deleted file mode 100644 index 940009f36..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_lock.tex +++ /dev/null @@ -1,41 +0,0 @@ - \bAPI{SHMEM\_LOCK}{Releases, locks, and tests a mutual exclusion memory lock.} -\synC -void shmem_clear_lock(long *lock); -void shmem_set_lock(long *lock); -int shmem_test_lock(long *lock); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -INTEGER lock, SHMEM_TEST_LOCK -CALL SHMEM_CLEAR_LOCK(lock) -CALL SHMEM_SET_LOCK(lock) -I = SHMEM_TEST_LOCK(lock) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\desB{ - \argRow{IN}{lock}{ A symmetric data object that is a scalar variable or an array - of length \CONST{1}. This data object must be set to \CONST{0} on all - \ac{PE}s prior to the first use. \VAR{lock} must be of type \CONST{long}. If you are using \Fortran, it must be of default kind.} -} -{ - The \FUNC{shmem\_set\_lock} routine sets a mutual exclusion lock after waiting - for the lock to be freed by any other \ac{PE} currently holding the lock. - Waiting \ac{PE}s are assured of getting the lock in a first-come, - first-served manner. The \FUNC{shmem\_clear\_lock} routine releases a lock previously set by \FUNC{shmem\_set\_lock} after ensuring that all local and remote stores initiated in the critical region are complete. The \FUNC{shmem\_test\_lock} routine sets a mutual exclusion lock only if it is currently cleared. By using this routine, a \ac{PE} can avoid blocking on a set lock. If the lock is currently set, the routine returns without waiting. These routines are appropriate for protecting a critical region from simultaneous update by multiple \ac{PE}s. -} -{ -\desR{ - The \FUNC{shmem\_test\_lock} routine returns \CONST{0} if the lock was originally cleared and this call was able to set the lock. A value of \CONST{1} is - returned if the lock had been set and the call returned without waiting - to set the lock.} -\notesB{ - The term symmetric data object is defined in Introduction. The lock variable should always be initialized to zero and accessed only by the \openshmem locking \ac{API}. - Changing the value of the lock variable by other means without using the \openshmem \ac{API}, can lead to undefined behavior. -% Section 41, there was discussion on the list about putting in language about the opacity of the lock variable after the routines have touched it. Initialize to zero, then only the API should be allowed to use it, cannot %guarantee any value meaningful to the programmer and any reset could lead to bad things. Do we want to tighten this up in this version? (e.g. from Brian Barrett) -} -} - -\exampleB { -\exampleITEM - {The following example uses \FUNC{shmem\_lock} in a \Clang{} program.} - {./EXAMPLES/shmem_lock_example.c}{} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_name.tex b/_deprecated_sources/TEX_FILES/shmem_name.tex deleted file mode 100644 index 470678189..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_name.tex +++ /dev/null @@ -1,40 +0,0 @@ -\bAPI{SHMEM\_INFO\_GET\_NAME}{This routine returns the vendor defined character string.} -\synC %Synopisis for C API - -void shmem_info_get_name(char *name); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -SHMEM_INFO_GET_NAME(NAME) -CHARACTER *(*)NAME %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{OUT}{name}{The vendor defined string.} - } -%API description -{ - This routine returns the vendor defined character string of size defined by the - constant SHMEM\_MAX\_NAME\_LEN. The program calling this function - prepares the memory of size SHMEM\_MAX\_NAME\_LEN, and the implementation copies the - string of size at most SHMEM\_MAX\_NAME\_LEN. In C, the string is terminated by a null character. - In Fortran, the string of size less than SHMEM\_MAX\_NAME\_LEN is padded with blank characters up to - size SHMEM\_MAX\_NAME\_LEN. The implementation copying a string of size greater than SHMEM\_MAX\_NAME\_LEN results - in an undefined behavior. - Multiple invocations of the - routine in an \openshmem{} program always return the same string. - For a given library implementation, the major and minor version returned by these calls is consistent with the compile-time constants defined in its shmem.h. -} -%This newline is required -{ -%API Description Table. -\desR{ - %Return Values - None. -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ None. - } -} -\eAPI - diff --git a/_deprecated_sources/TEX_FILES/shmem_p.tex b/_deprecated_sources/TEX_FILES/shmem_p.tex deleted file mode 100644 index 2fdfb55f7..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_p.tex +++ /dev/null @@ -1,33 +0,0 @@ -\bAPI{SHMEM\_P}{Copies one data item to a remote \ac{PE}.} -\synC -void shmem_char_p(char *addr, char value, int pe); -void shmem_short_p(short *addr, short value, int pe); -void shmem_int_p(int *addr, int value, int pe); -void shmem_long_p(long *addr, long value, int pe); -void shmem_longlong_p(long long *addr, long long value, int pe); -void shmem_float_p(float *addr, float value, int pe); -void shmem_double_p(double *addr, double value, int pe); -void shmem_longdouble_p(long double *addr, long double value, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\desB{ - \argRow{IN}{addr}{The remotely accessible array element or scalar data object - which will receive the data on the remote \ac{PE}.} - \argRow{IN}{value}{The value to be transferred to \VAR{addr} on the remote \ac{PE}.} - \argRow{IN}{pe}{The number of the remote \ac{PE}.} -} -{ These routines provide a very low latency put capability for single - elements of most basic types. - - As with \FUNC{shmem\_put}, these routines start the remote transfer and may - return before the data is delivered to the remote \ac{PE}. Use - \FUNC{shmem\_quiet} to force completion of all remote \PUT{} transfers. -} -{ -\desR{None.} -\notesB{None.} -} -\exampleB { -\exampleITEM{The following example uses \FUNC{shmem\_double\_p} in a \Clang{} program.} - {./EXAMPLES/shmem_p_example.c}{} - } -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_pe_accessible.tex b/_deprecated_sources/TEX_FILES/shmem_pe_accessible.tex deleted file mode 100644 index 9ac497783..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_pe_accessible.tex +++ /dev/null @@ -1,29 +0,0 @@ -\bAPI{SHMEM\_PE\_ACCESSIBLE}{Determines whether a \ac{PE} is accessible via \openshmem's data transfer routines.} -\synC -int shmem_pe_accessible(int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF -LOGICAL LOG, SHMEM_PE_ACCESSIBLE -INTEGER pe -LOG = SHMEM_PE_ACCESSIBLE(pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\desB{ -\argRow{IN}{pe}{Specific \ac{PE} to be checked for accessibility from the local \ac{PE}.} -} -{ - \FUNC{shmem\_pe\_accessible} is a query routine that indicates whether a - specified \ac{PE} is accessible via \openshmem from the local \ac{PE}. The \FUNC{shmem\_pe\_accessible} routine returns \CONST{TRUE} only if the remote \ac{PE} is a process running from the same executable file as the local \ac{PE}, indicating that full \openshmem support for symmetric data objects (that reside in the static memory and symmetric heap) is available, otherwise it returns \CONST{FALSE}. This routine may be particularly useful for hybrid programming with other communication libraries (such as a \ac{MPI}) or parallel languages. For example, on SGI Altix series systems, \openshmem is supported across multiple partitioned hosts and InfiniBand connected hosts. When running multiple executable MPI programs using \openshmem on an Altix, full \openshmem support is available between processes running from the same executable file. However, \openshmem support between processes of different executable files is supported only for data objects on the symmetric heap, since static data objects are not symmetric between different executable files. -% The \FUNC{shmem\_pe\_accessible} function on Altix returns -% TRUE only if the remote \ac{PE} is a process running from the same -% executable file as the local \ac{PE}, indicating that full \openshmem support -% (static memory and symmetric heap) is available. -} -{ -\desR{\Clang: The return value is 1 if the specified \ac{PE} is a valid remote - \ac{PE} for \openshmem routines; otherwise, it is 0. \\ \\ - \Fortran: The return value is \CONST{.TRUE.} if the specified \ac{PE} is a valid - remote \ac{PE} for \openshmem routines; otherwise, it is \CONST{.FALSE.}. - } -\notesB{ None. } -} - -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_ptr.tex b/_deprecated_sources/TEX_FILES/shmem_ptr.tex deleted file mode 100644 index 313c273b8..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_ptr.tex +++ /dev/null @@ -1,45 +0,0 @@ -\bAPI{SHMEM\_PTR}{Returns a pointer to a data object on a specified - \ac{PE}.} -\synC -void *shmem_ptr(void *dest, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF -POINTER (PTR, POINTEE) -INTEGER pe -PTR = SHMEM_PTR(dest, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\desB{ -\argRow{IN}{dest}{The symmetric data object to be referenced.} -\argRow{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to - be accessed. If you are using \Fortran, it must be a default - integer value.} -} -{ - \FUNC{shmem\_ptr} returns an address that may be used to directly reference - \dest{} on the specified \ac{PE}. This address can be assigned to a pointer. - After that, ordinary loads and stores to this remote address may be performed. - - When a sequence of loads (gets) and stores (puts) to a data object on a - remote \ac{PE} does not match the access pattern provided in an \openshmem data - transfer routine like \FUNC{shmem\_put32} or \FUNC{shmem\_real\_iget}, the - \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish the - communication. -} -{ - \desR{ - The return value is a non-NULL address of the \dest{} data object when it is - accessible using memory loads and stores in addition to \openshmem{} operations. - Otherwise, a NULL address is returned. -} -\notesB{ - When calling \FUNC{shmem\_ptr}, \dest{} is the address of - the referenced symmetric data object on the calling \ac{PE}. - } -} - -\exampleB{ - \exampleITEM{This \Fortran{} program calls \FUNC{shmem\_ptr} and then \ac{PE} 0 writes to the \VAR{BIGD} - array on \ac{PE} 1:}{./EXAMPLES/shmem_ptr_example.f90}{} - \exampleITEM{This is the equivalent program written in \Clang:} - {./EXAMPLES/shmem_ptr_example.c}}{} - -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_put.tex b/_deprecated_sources/TEX_FILES/shmem_put.tex deleted file mode 100755 index 8341c63c9..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_put.tex +++ /dev/null @@ -1,87 +0,0 @@ -\bAPI{SHMEM\_PUT}{The put routines provide a method for copying data from a contiguous local data object to a data object on a specified \ac{PE}.} -\synC %Synopisis for C API - -void shmem_double_put(double *dest, const double *source, size_t nelems, int pe); -void shmem_float_put(float *dest, const float *source, size_t nelems, int pe); -void shmem_int_put(int *dest, const int *source, size_t nelems, int pe); -void shmem_long_put(long *dest, const long *source, size_t nelems, int pe); -void shmem_longdouble_put(long double *dest, const long double *source, size_t nelems, int pe); -void shmem_longlong_put(long long *dest, const long long *source, size_t nelems, int pe); -void shmem_put32(void *dest, const void *source, size_t nelems, int pe); -void shmem_put64(void *dest, const void *source, size_t nelems, int pe); -void shmem_put128(void *dest, const void *source, size_t nelems, int pe); -void shmem_putmem(void *dest, const void *source, size_t nelems, int pe); -void shmem_short_put(short*dest, const short*source, size_t nelems, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_CHARACTER_PUT(dest, source, nelems, pe) -CALL SHMEM_COMPLEX_PUT(dest, source, nelems, pe) -CALL SHMEM_DOUBLE_PUT(dest, source, nelems, pe) -CALL SHMEM_INTEGER_PUT(dest, source, nelems, pe) -CALL SHMEM_LOGICAL_PUT(dest, source, nelems, pe) -CALL SHMEM_PUT4(dest, source, nelems, pe) -CALL SHMEM_PUT8(dest, source, nelems, pe) -CALL SHMEM_PUT32(dest, source, nelems, pe) -CALL SHMEM_PUT64(dest, source, nelems, pe) -CALL SHMEM_PUT128(dest, source, nelems, pe) -CALL SHMEM_PUTMEM(dest, source, nelems, pe) -CALL SHMEM_REAL_PUT(dest, source, nelems, pe) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{IN}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} - \argRow{OUT}{source}{Data object containing the data to be copied.} - \argRow{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are using \Fortran, it must be a constant, variable, or array element of default integer type.} - \argRow{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be of type integer. If you are using \Fortran, it must be a constant, variable, or array element of default integer type.} - } - %API description - { The routines return after the data has been copied out of the \source{} - array on the local \ac{PE}. - The delivery of data words into the data object on the destination \ac{PE} - may occur in any order. Furthermore, two successive put routines - may deliver data out of order unless a call to \FUNC{shmem\_fence} is - introduced between the two calls. - } - %API Description Table. -{ - % If there is no Description Table and return, this field can be - \hfill \\ - \desTB { - The \dest{} and \source{} data objects must conform to certain typing - constraints, which are as follows: } - { - \cRow{shmem\_putmem}{ \Fortran: Any noncharacter type. \Clang: Any - data type. \VAR{nelems} is scaled in bytes.} - \cRow{shmem\_put4, shmem\_put32}{Any noncharacter type that has a storage size equal to \CONST{32} bits. } - \cRow{shmem\_put8, shmem\_put64}{Any noncharacter type that has a storage size equal to \CONST{64} bits. } - \cRow{shmem\_put128}{Any noncharacter type that has a storage size equal to \CONST{128} bits. } - \cRow{shmem\_double\_put}{Elements of type double.} - \cRow{shmem\_longdouble\_put}{Elements of type long double.} - \cRow{SHMEM\_CHARACTER\_PUT}{Elements of type character. \VAR{nelems} is the number of characters to transfer. The actual character lengths of the \source{} and \dest{} variables are ignored. } - \cRow{SHMEM\_COMPLEX\_PUT}{Elements of type complex of default size.} - \cRow{SHMEM\_DOUBLE\_PUT}{Elements of type double precision. } - \cRow{SHMEM\_INTEGER\_PUT}{Elements of type integer.} - \cRow{SHMEM\_LOGICAL\_PUT}{Elements of type logical.} - \cRow{SHMEM\_REAL\_PUT}{Elements of type real.} - } - %Return Values -\desR{None.} -% Notes. If there are no notes, this field can be left empty. -\notesB{ If you are using \Fortran, data types must be of default size. For - example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or - \CONST{REAL(KIND=KIND(1.0))}. The Fortran API routine \FUNC{SHMEM\_PUT} has - been deprecated, and either \FUNC{SHMEM\_PUT8} or \FUNC{SHMEM\_PUT64} should be used - in its place. -} -}%end of DesB -%Example -\exampleB{ -%For each example, you can enter it as an item. - \exampleITEM - { The following \FUNC{shmem\_put} example is for \CorCpp{} programs:} - {./EXAMPLES/shmem_put_example.c} - {} - } -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_quiet.tex b/_deprecated_sources/TEX_FILES/shmem_quiet.tex deleted file mode 100644 index 2483766d0..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_quiet.tex +++ /dev/null @@ -1,45 +0,0 @@ -\bAPI{SHMEM\_QUIET}{Waits for completion of all outstanding \PUT{}, \acp{AMO} and memory store routines to symmetric data objects issued by a \ac{PE}.} -\synC %Synopisis for C API - -void shmem_quiet(void); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_QUIET %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{None.}{}{} -} -%API description - { - The \FUNC{shmem\_quiet} routine ensures completion of \PUT{}, \acp{AMO}, and memory store routines on symmetric data objects issued by the calling \ac{PE}. All \PUT{}, \acp{AMO}, memory store routines to symmetric data objects are guaranteed to be completed and visible to all \ac{PE}s when \FUNC{shmem\_quiet} returns. - %This also applies to all store operations to symmetric data issued by the calling \ac{PE}. - %SP: Removing confusing parts as according to SGI they are complete at the end of quiet. - %no later than any subsequent memory load or - %store, \PUT{} or \GET{}, \acp{AMO}, or synchronization operations that follow the call to \FUNC{shmem\_quiet}. -} - %API Description Table. -{ - %Return Values -\desR{None.} -% Notes. If there are no notes, this field can be left empty. -\notesB{ - \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several \PUT{}, \acp{AMO}, and memory store routines to symmetric data objects initiated by the calling \ac{PE}. For example, you might use \FUNC{shmem\_quiet} to await delivery of a block of data before issuing another \PUT{}, which sets a completion flag on another \ac{PE}. - - \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or - \FUNC{shmem\_barrier} are called. The barrier routines wait for the - completion of outstanding writes (\PUT{}, \ac{AMO}, memory stores) to symmetric data objects on all \acp{PE}. -} -} -%Example -\exampleB{ -%For each example, you can enter it as an item. - \exampleITEM - {The following example uses \FUNC{shmem\_quiet} in a \CorCpp{} program: } - {./EXAMPLES/shmem_quiet_example.c} - {\VAR{Put1} and \VAR{put2} will be completed and visible before \VAR{put3} and \VAR{put4}.} -} -\eAPI - diff --git a/_deprecated_sources/TEX_FILES/shmem_reductions.tex b/_deprecated_sources/TEX_FILES/shmem_reductions.tex deleted file mode 100644 index 604178158..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_reductions.tex +++ /dev/null @@ -1,302 +0,0 @@ -\bAPI{SHMEM\_REDUCTIONS}{Performs arithmetic and logical operations across a set of \ac{PE}s.} - -\textbf{AND} \newline -Performs a bitwise AND function across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_int_and_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_and_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longlong_and_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_and_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -void shmem_int_and_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_INT4_AND_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_AND_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\bigskip -\textbf{MAX} \newline -Performs a maximum function reduction across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_double_max_to_all(double *dest, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_max_to_all(float *dest, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_max_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_max_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_max_to_all(long double *dest, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_max_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_max_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_INT4_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\bigskip -\textbf{MIN} \newline -Performs a minimum function reduction across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_double_min_to_all(double *dest, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_min_to_all(float *dest, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_min_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_min_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_min_to_all(long double *dest, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_min_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_min_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -CALL SHMEM_INT4_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\bigskip -\textbf{SUM} \newline -Performs a sum reduction across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_complexd_sum_to_all(double complex *dest, double complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double complex *pWrk, long *pSync); -void shmem_complexf_sum_to_all(float complex *dest, float complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float complex *pWrk, long *pSync); -void shmem_double_sum_to_all(double *dest, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_sum_to_all(float *dest, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_sum_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_sum_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride,int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_sum_to_all(long double *dest, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_sum_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_sum_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -CALL SHMEM_COMP4_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_COMP8_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT4_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\bigskip -\textbf{PROD} \newline -Performs a product reduction across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_complexd_prod_to_all(double complex *dest, double complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double complex *pWrk, long *pSync); -void shmem_complexf_prod_to_all(float complex *dest, float complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float complex *pWrk, long *pSync); -void shmem_double_prod_to_all(double *dest, double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_prod_to_all(float *dest, float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_prod_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_prod_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_prod_to_all(long double *dest, long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_prod_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_prod_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -CALL SHMEM_COMP4_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_COMP8_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT4_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\bigskip -\textbf{OR} \newline -Performs a bitwise OR function reduction across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_int_or_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_or_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longlong_or_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_or_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); -%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -CALL SHMEM_INT4_OR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_OR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\bigskip -\textbf{XOR}\newline -Performs a bitwise EXCLUSIVE OR reduction across a set of processing elements (\ac{PE}s).\newline -\synC %Synopisis for C API - -void shmem_int_xor_to_all(int *dest, int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_xor_to_all(long *dest, long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longlong_xor_to_all(long long *dest, long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_xor_to_all(short *dest, short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -\synF %Synopsis for FORTRAN API - -CALL SHMEM_INT4_XOR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_XOR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{IN}{dest}{A symmetric array, of length \VAR{nreduce} elements, to receive the result of the reduction routines. The data type of \dest{} varies with the version of the reduction routine being called. When calling from \CorCpp, refer to the SYNOPSIS section for data type information.} - \argRow{IN}{source}{ A symmetric array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The \source{} argument must have the same data type as \dest.} - \argRow{IN}{\VAR{nreduce}}{The number of elements in the \dest{} and \source{} arrays. \VAR{nreduce} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of \ac{PE}s. \VAR{PE\_start} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{PE\_size}{The number of \ac{PE}s in the \activeset. \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must be a default integer value.} - \argRow{IN}{pWrk}{A symmetric work array. The \VAR{pWrk} argument must have the same data type as \dest. In \CorCpp, this contains max(\VAR{nreduce}/2 + 1, \CONST{\_SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}) elements. In \Fortran, this contains max(\VAR{nreduce}/2 + 1, \CONST{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}) elements.} - \argRow{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be of type long and size \CONST{\_SHMEM\_REDUCE\_SYNC\_SIZE}. In \Fortran, \VAR{pSync} must be of type integer and size \CONST{SHMEM\_REDUCE\_SYNC\_SIZE}. If you are using \Fortran, it must be a default integer value. Every element of this array must be initialized with the value \CONST{\_SHMEM\_SYNC\_VALUE} (in \CorCpp) or \CONST{SHMEM\_SYNC\_VALUE} (in \Fortran) before any of the \ac{PE}s in the \activeset{} enter the reduction routine.} - } - %API description - { - \openshmem reduction routines compute one or more - reductions across symmetric arrays on multiple \acp{PE}. A - reduction performs an associative binary routine across a set of - values. - - The \VAR{nreduce} argument determines the number of separate reductions to - perform. The \source{} array on all \ac{PE}s in the \activeset{} provides one - element for each reduction. The results of the reductions are placed - in the \dest{} array on all \ac{PE}s in the \activeset. The \activeset{} is - defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. - - The \source{} and \dest{} arrays may be the same array, but they may not be - overlapping arrays. - - As with all \openshmem{} collective routines, each of these routines assumes - that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the - \activeset{} calls an \openshmem collective routine, undefined behavior - results. - -The values of arguments \VAR{nreduce}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \ac{PE}s in the \activeset. The same \dest{} and \source{} arrays, and the same \VAR{pWrk} and \VAR{pSync} work arrays, must be passed to all \ac{PE}s in the \activeset. - - Before any \ac{PE} calls a reduction routine, you must ensure that the - following conditions exist (synchronization via a \OPR{barrier} or some other - method is often needed to ensure this): The \VAR{pWrk} and \VAR{pSync} arrays on - all \ac{PE}s in the \activeset{} are not still in use from a prior call to a - collective \openshmem{} routine. The \dest{} array on all \ac{PE}s in the \activeset{} - is ready to accept the results of the \OPR{reduction}. - - Upon return from a reduction routine, the following are true for the - local \ac{PE}: The \dest{} array is updated. The values in the \VAR{pSync} array - are restored to the original values. -} -{ -{ -\hfill \\ - \desTBC{ When calling from \Fortran, the \dest{} date types are as follows:} - {Routine}{Data Type}{ - \cRow{shmem\_int8\_and\_to\_all}{Integer, with an element size of 8 bytes.} - \cRow{shmem\_\_int4\_and\_to\_all}{Integer, with an element size of 4 bytes.} - \cRow{shmem\_comp8\_max\_to\_all}{Complex, with an element size equal to two 8-byte real values.} - \cRow{shmem\_int4\_max\_to\_all}{Integer, with an element size of 4 bytes.} - \cRow{shmem\_int8\_max\_to\_all}{Integer, with an element size of 8 bytes.} - \cRow{shmem\_real4\_max\_to\_all}{Real, with an element size of 4 bytes.} - \cRow{shmem\_real16\_max\_to\_all}{Real, with an element size of 16 bytes.} - \cRow{shmem\_int4\_min\_to\_all}{Integer, with an element size of 4 bytes.} - \cRow{shmem\_int8\_min\_to\_all}{Integer, with an element size of 8 bytes.} - \cRow{shmem\_real4\_min\_to\_all}{Real, with an element size of 4 bytes.} - \cRow{shmem\_real8\_min\_to\_all}{Real, with an element size of 8 bytes.} - \cRow{shmem\_real16\_min\_to\_all}{Real,with an element size of 16 bytes.} - \cRow{shmem\_comp4\_sum\_to\_all}{Complex, with an element size equal to two 4-byte real values.} - \cRow{shmem\_comp8\_sum\_to\_all}{Complex, with an element size equal to two 8-byte real values.} - \cRow{shmem\_int4\_sum\_to\_all}{Integer, with an element size of 4 bytes.} - \cRow{shmem\_int8\_sum\_to\_all}{Integer, with an element size of 8 bytes..} - \cRow{shmem\_real4\_sum\_to\_all}{Real, with an element size of 4 bytes.} - \cRow{shmem\_real8\_sum\_to\_all}{Real, with an element size of 8 bytes.} - \cRow{shmem\_real16\_sum\_to\_all}{Real, with an element size of 16 bytes.} - \cRow{shmem\_comp4\_prod\_to\_all}{ Complex, with an element size equal to two 4-byte real values. } - \cRow{shmem\_comp8\_prod\_to\_all}{ Complex, with an element size equal to two 8-byte real values.} - \cRow{shmem\_int4\_prod\_to\_all}{Integer, with an element size of 4 bytes.} - \cRow{shmem\_int8\_prod\_to\_all}{Integer, with an element size of 8 bytes.} - \cRow{shmem\_real4\_prod\_to\_all}{Real, with an element size of 4 bytes.} - \cRow{shmem\_real8\_prod\_to\_all}{Real, with an element size of 8 bytes.} - \cRow{shmem\_real16\_prod\_to\_all}{Real, with an element size of 16 bytes.} - \cRow{shmem\_int8\_or\_to\_all}{Integer, with an element size of 8 bytes.} - \cRow{shmem\_int4\_or\_to\_all}{Integer, with an element size of 4 bytes.} -% \cRow{shmem\_comp8\_xor\_to\_all}{Complex, with an element size equal to two 8-byte real values.} -% \cRow{shmem\_comp4\_xor\_to\_all}{Complex, with an element size equal to two 4-byte real values.} - \cRow{shmem\_int8\_xor\_to\_all}{Integer, with an element size of 8 bytes.} - \cRow{shmem\_int4\_xor\_to\_all}{Integer, with an element size of 4 bytes.} -% \cRow{shmem\_real8\_xor\_to\_all}{Real, with an element size of 8 bytes.} -% \cRow{shmem\_real4\_xor\_to\_all}{Real, with an element size of 4 bytes.} - } -}%end of DesB -{%Return Values -\desR{None.} -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ - All \openshmem{} reduction routines reset the values in \VAR{pSync} before they - return, so a particular \VAR{pSync} buffer need only be initialized the first - time it is used. - - You must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} in - the \activeset{} while any of the \ac{PE}s participate in processing of an - \openshmem{} reduction routine. Be careful to avoid the following situations: - If the \VAR{pSync} array is initialized at run time, some type of synchronization is needed to - ensure that all \ac{PE}s in the working set have initialized \VAR{pSync} before any of - them enter an \openshmem routine called - with the \VAR{pSync} synchronization array. A \VAR{pSync} or \VAR{pWrk} array can be - reused in a subsequent reduction routine call only if none of the \ac{PE}s - in the \activeset{} are still processing a prior reduction routine call - that used the same \VAR{pSync} or \VAR{pWrk} arrays. In general, this can be - assured only by doing some type of synchronization. -% However, in the -% special case of reduction routines being called with the same \activeset, -% you can allocate two \VAR{pSync} and \VAR{pWrk} arrays and alternate between -% them on successive calls. -} -} -%Example -\exampleB{ -%For each example, you can enter it as an item. - \exampleITEMF - { This \Fortran{} reduction example statically initializes the \VAR{pSync} array and finds the logical \OPR{AND} of the integer variable \VAR{FOO} across all even \ac{PE}s.} - {./EXAMPLES/shmem_and_example.f90} - {} - \exampleITEMF - {This \Fortran{} example statically initializes the \VAR{pSync} array - and finds the \OPR{maximum} value of real variable \VAR{FOO} across all even \ac{PE}s.} - {./EXAMPLES/shmem_max_example.f90} - {} - \exampleITEMF - { This \Fortran{} example statically initializes the \VAR{pSync} array - and finds the \OPR{minimum} value of real variable \VAR{FOO} across all the even - \ac{PE}s.} - {./EXAMPLES/shmem_min_example.f90} - {} - \exampleITEMF - {This \Fortran{} example statically initializes the \VAR{pSync} array - and finds the \OPR{sum} of the real variable \VAR{FOO} across all even \ac{PE}s.} - {./EXAMPLES/shmem_sum_example.f90} - {} - \exampleITEMF - {This \Fortran{} example statically initializes the \VAR{pSync} array - and finds the \OPR{product} of the real variable \VAR{FOO} across all the even \ac{PE}s.} - {./EXAMPLES/shmem_prod_example.f90} - {} - \exampleITEMF - {This \Fortran{} example statically initializes the \VAR{pSync} array - and finds the logical \OPR{OR} of the integer variable \VAR{FOO} across all even - \ac{PE}s.} - {./EXAMPLES/shmem_or_example.f90} - {} - \exampleITEMF - {This \Fortran{} example statically initializes the \VAR{pSync} array - and computes the exclusive \OPR{XOR} of variable \VAR{FOO} across all even \ac{PE}s.} - {./EXAMPLES/shmem_xor_example.f90} - {} - } -\eAPI - - - diff --git a/_deprecated_sources/TEX_FILES/shmem_swap.tex b/_deprecated_sources/TEX_FILES/shmem_swap.tex deleted file mode 100644 index d1c8977e2..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_swap.tex +++ /dev/null @@ -1,67 +0,0 @@ -\bAPI{SHMEM\_SWAP}{Performs an atomic swap to a remote data object.} -\synC - -double shmem_double_swap(double *dest, double value, int pe); -float shmem_float_swap(float *dest, float value, int pe); -int shmem_int_swap(int *dest, int value, int pe); -long shmem_long_swap(long *dest, long value, int pe); -long long shmem_longlong_swap(long long *dest, long long value, int pe); -long shmem_swap(long *dest, long value, int pe); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -INTEGER SHMEM_SWAP, value, pe -ires = SHMEM_SWAP(dest, value, pe) -INTEGER*4 SHMEM_INT4_SWAP, value_i4, ires_i4 -ires_i4 = SHMEM_INT4_SWAP(dest, value_i4, pe) -INTEGER*8 SHMEM_INT8_SWAP, value_i8, ires_i8 -ires_i8 = SHMEM_INT8_SWAP(dest, value_i8, pe) -REAL*4 SHMEM_REAL4_SWAP, value_r4, res_r4 -res_r4 = SHMEM_REAL4_SWAP(dest, value_r4, pe) -REAL*8 SHMEM_REAL8_SWAP, value_r8, res_r8 -res_r8 = SHMEM_REAL8_SWAP(dest, value_r8, pe)%*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -%ORIGINAL - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{OUT}{dest}{The remotely accessible integer data object to be updated on - the remote \ac{PE}. If you are using \CorCpp, the type of \dest{} - should match that implied in the SYNOPSIS section.} -\argRow{IN}{value}{The value to be atomically written to the remote \ac{PE}. \VAR{value} is - the same type as \dest.} -\argRow{IN}{pe}{ An integer that indicates the \ac{PE} number on which \dest{} is to - be updated. If you are using \Fortran, it must be a default - integer value.} -} -%API description -{ \FUNC{shmem\_swap} performs an atomic swap operation. It writes \VAR{value} into \dest{} on \ac{PE} and returns the previous - contents of \dest{} as an atomic operation. -} -%API Description Table -{ -\hfill \\ -\desTB {If you are using \Fortran, \VAR{dest} must be of the following type:} -{ -\cRow{SHMEM\_SWAP}{Integer of default kind} -\cRow{SHMEM\_INT4\_SWAP}{\CONST{4}-byte integer} -\cRow{SHMEM\_INT8\_SWAP}{\CONST{8}-byte integer} -\cRow{SHMEM\_REAL4\_SWAP}{\CONST{4}-byte real} -\cRow{SHMEM\_REAL8\_SWAP}{\CONST{8}-byte real} -} - %Return Values -\desR{ - The content that had been at the \dest{} address on the remote \ac{PE} prior - to the swap is returned. -} -%end of DesB% Notes. If there are no notes, this field can be left empty. -\notesB{None.} -} -%Example -\exampleB{ - %For each example, you can enter it as an item. - \exampleITEM - {The example below swap values between odd numbered \acp{PE} and their right (modulo) neighbor and outputs the result of swap.} - {./EXAMPLES/shmem_swap_example.c} -{} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shmem_version.tex b/_deprecated_sources/TEX_FILES/shmem_version.tex deleted file mode 100644 index 3fd42e60d..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_version.tex +++ /dev/null @@ -1,34 +0,0 @@ -\bAPI{SHMEM\_INFO\_GET\_VERSION}{Returns the major and minor version of the library implementation.} -\synC %Synopisis for C API - -void shmem_info_get_version(int *major, int *minor); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -SHMEM_INFO_GET_VERSION(MAJOR, MINOR) -INTEGER MAJOR, MINOR %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{OUT}{major}{The major version of the \openshmem{} standard in use.} - \argRow{OUT}{minor}{The minor version of the \openshmem{} standard in use.} - } -%API description -{ - This routine returns the major and minor version of the \openshmem{} standard in use. - For a given library implementation, the major and minor version returned by these calls is consistent with the compile-time constants, SHMEM\_MAJOR\_VERSION and SHMEM\_MINOR\_VERSION, defined in its shmem.h. - The valid major version value is $1$, and the valid minor version value is $2$. -} -%This newline is required -{ -%API Description Table. -\desR{ - %Return Values - None. -} -% Notes. If there are no notes, this field can be left empty. -\notesB{ None. - } -} -\eAPI - diff --git a/_deprecated_sources/TEX_FILES/shmem_wait.tex b/_deprecated_sources/TEX_FILES/shmem_wait.tex deleted file mode 100644 index d2fcd3e89..000000000 --- a/_deprecated_sources/TEX_FILES/shmem_wait.tex +++ /dev/null @@ -1,127 +0,0 @@ -\bAPI{SHMEM\_WAIT}{Wait for a variable on the local \ac{PE} to change.} -\label{subsec:shmem_wait} -\synC %Synopisis for C API - -void shmem_int_wait(int *ivar, int cmp_value); -void shmem_int_wait_until(int *ivar, int cmp, int cmp_value); -void shmem_long_wait(long *ivar, long cmp_value); -void shmem_long_wait_until(long *ivar, int cmp, long cmp_value); -void shmem_longlong_wait(long long *ivar, long long cmp_value); -void shmem_longlong_wait_until(long long *ivar, int cmp, long long cmp_value); -void shmem_short_wait(short *ivar, short cmp_value); -void shmem_short_wait_until(short *ivar, int cmp, short cmp_value); -void shmem_wait(long *ivar, long cmp_value); -void shmem_wait_until(long *ivar, int cmp, long cmp_value);%*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT -% -%void shmem_int_wait(int *var, int value); -%void shmem_int_wait_until(int *var, int cond, int value); -%void shmem_long_wait(long *var, long value); -%void shmem_long_wait_until(long *var, int cond, long value); -%void shmem_longlong_wait(long long *var, long long value); -%void shmem_longlong_wait_until(long long *var, int cond, long long value); -%void shmem_short_wait(short *var, short value); -%void shmem_short_wait_until(short *var, int cond, short value); -%void shmem_wait(long *ivar, long cmp_value); -%void shmem_wait_until(long *ivar, int cmp, long value); %*\synCE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -\synF %Synopsis for FORTRAN API - -CALL SHMEM_INT4_WAIT(ivar, cmp_value) -CALL SHMEM_INT4_WAIT_UNTIL(ivar, cmp, cmp_value) -CALL SHMEM_INT8_WAIT(ivar, cmp_value) -CALL SHMEM_INT8_WAIT_UNTIL(ivar, cmp, cmp_value) -CALL SHMEM_WAIT(ivar, cmp_value) -CALL SHMEM_WAIT_UNTIL(ivar, cmp, cmp_value) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ -\argRow{OUT}{ivar}{A remotely accessible integer variable that is being updated by - another \ac{PE}. If you are using \CorCpp, the type of \VAR{ivar} should - match that implied in the SYNOPSIS section.} -\argRow{IN}{cmp}{The compare operator that compares \VAR{ivar} with \VAR{cmp\_value}. \VAR{cmp} - must be of type integer. If you are using \Fortran, it must be - of default kind. If you are using \CorCpp, the type of \VAR{cmp} - should match that implied in the SYNOPSIS section.} -\argRow{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. If you are using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the SYNOPSIS - section. If you are using \Fortran, cmp\_value must be an - integer of the same size and kind as \VAR{ivar}.} -} -%API description -{ - \FUNC{shmem\_wait} and \FUNC{shmem\_wait\_until} wait for \VAR{ivar} to be changed by a remote - write or an atomic operation issued by a different \ac{PE}. - These routines can be used for point-to-point direct synchronization. A call to - \VAR{shmem\_wait} does not return until some other \ac{PE} writes a value, - not equal to \VAR{cmp\_value}, into \VAR{ivar} on the waiting \ac{PE}. A call to - \FUNC{shmem\_wait\_until} does not return until some other \ac{PE} changes - \VAR{ivar} to satisfy the condition implied by \VAR{cmp} and \VAR{cmp\_value}. - This mechanism is useful when a \ac{PE} needs to tell another \ac{PE} - that it has completed some action. - The \FUNC{shmem\_wait} routines return when \VAR{ivar} is no longer equal to - \VAR{cmp\_value}. The \FUNC{shmem\_wait\_until} routines return when the compare condition is true. The compare condition is defined by the \VAR{ivar} argument compared - with the \VAR{cmp\_value} using the comparison operator, \VAR{cmp}. - } - %API Description Table. -{ -\hfill \\ - % If there is no Description Table and return, this field can be - \desTBC{ If you are using \Fortran, \VAR{ivar} must be a specific sized integer type according to the routine being called, as follows:} - {Routine}{Type of \VAR{ivar}} - { - %\cRow{Function}{Type of ivar} - \cRow{shmem\_wait, shmem\_wait\_until}{default INTEGER} - \cRow{shmem\_int4\_wait, shmem\_int4\_wait\_until}{INTEGER*4} -\cRow{shmem\_int8\_wait, shmem\_int8\_wait\_until}{INTEGER*8} -} - -\desTBC{ The following \VAR{cmp} values are supported:}{CMP Value}{Comparison} - { -%\cRow{cmp Value}{Comparison} -\CorCpp:\\ -\cRow{\_SHMEM\_CMP\_EQ }{ Equal} -\cRow{\_SHMEM\_CMP\_NE}{Not equal} -\cRow{\_SHMEM\_CMP\_GT}{Greater than} -\cRow{\_SHMEM\_CMP\_LE}{Less than or equal to} -\cRow{\_SHMEM\_CMP\_LT}{Less than} -\cRow{\_SHMEM\_CMP\_GE}{Greater than or equal to} -\\ -\Fortran:\\ -\cRow{SHMEM\_CMP\_EQ }{ Equal} -\cRow{SHMEM\_CMP\_NE}{Not equal} -\cRow{SHMEM\_CMP\_GT}{Greater than} -\cRow{SHMEM\_CMP\_LE}{Less than or equal to} -\cRow{SHMEM\_CMP\_LT}{Less than} -\cRow{SHMEM\_CMP\_GE}{Greater than or equal to} -} -%Return Values -\desR{None.} -%end Description Table. -\notesImp{ - Implementations must ensure that \FUNC{shmem\_wait} and - \FUNC{shmem\_wait\_until} do not return before the update of the memory - indicated by \VAR{ivar} is fully complete. Partial updates to the memory - must not cause \FUNC{shmem\_wait} or \FUNC{shmem\_wait\_until} to return. -} -} -\exampleB{ -%For each example, you can enter it as an item. - \exampleITEMF - { The following call returns when variable ivar is not equal to 100:} -{./EXAMPLES/shmem_wait1_example.f90} - {} - \exampleITEMF - { The following call to \FUNC{SHMEM\_INT8\_WAIT\_UNTIL} is equivalent to the call to \FUNC{SHMEM\_INT8\_WAIT} in example 1:} -{./EXAMPLES/shmem_wait2_example.f90} - {} - \exampleITEMF - {The following \CorCpp{} call waits until the sign bit in ivar - is set by a transfer from a remote PE:} -{./EXAMPLES/shmem_wait3_example.f90} - {} - \exampleITEMF - {The following \Fortran{} example is in the context of a - subroutine:} -{./EXAMPLES/shmem_wait4_example.f90} - {} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shpalloc.tex b/_deprecated_sources/TEX_FILES/shpalloc.tex deleted file mode 100644 index 7152ff3dc..000000000 --- a/_deprecated_sources/TEX_FILES/shpalloc.tex +++ /dev/null @@ -1,55 +0,0 @@ -\bAPI{SHPALLOC}{Allocates a block of memory from the symmetric heap.} -\synF %Synopsis for FORTRAN API - -POINTER (addr, A(1)) -INTEGER length, errcode, abort -CALL SHPALLOC(addr, length, errcode, abort) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{OUT}{addr}{First word address of the allocated block.} - \argRow{IN}{length}{Number of words of memory requested. One word is 32 bits.} - \argRow{OUT}{errcode}{Error code is \CONST{0} if no error was detected; otherwise, it is a negative integer code for the type of error.} - \argRow{IN}{abort}{Abort code; nonzero requests abort on error; \CONST{0} requests - an error code.} -} - %API description - { - \FUNC{SHPALLOC} allocates a block of memory from the program's symmetric heap - that is greater than or equal to the size requested. To maintain symmetric heap - consistency, all \ac{PE}s in an program must call \FUNC{SHPALLOC} with the same value of length; if any \ac{PE}s are missing, the program will hang. - - By using the \Fortran{} \CONST{POINTER} mechanism in the following manner, you can use array \VAR{A} to refer to the block allocated by \FUNC{SHPALLOC}: \CONST{POINTER} (\VAR{addr}, - \VAR{A}()) - } - %API Description Table. -{ -{ - %Return Values -\desR{ } -\cRow{Error Code} {Condition} -\cRow{ \CONST{-1} } {Length is not an integer greater than \CONST{0}} -\cRow{\CONST{-2}} { No more memory is available from the system (checked if the request cannot be satisfied from the available blocks on the symmetric heap).} -}%end of DesR -\notesB{ - The total size of the symmetric heap is determined at job startup. One - may adjust the size of the heap using the \CONST{SMA\_SYMMETRIC\_SIZE} - environment variable (if available). -} - \notesImp{ - The symmetric heap allocation functions always return a pointer to - corresponding symmetric objects across all PEs. The \openshmem{} - specification does not require that the virtual addresses are equal across - all \acp{PE}. Nevertheless, the implementation must avoid costly address - translation operations in the communication path, including order $N$ (where - $N$ is the number of \acp{PE}) memory translation tables. - In order to avoid address translations, the implementation may - re-map the allocated block of memory based on agreed virtual address. - Additionally, some operating systems provide an option to disable - virtual address randomization, which enables predictable allocation - of virtual memory addresses. - } -} -%Example -%\exampleB{} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shpclmove.tex b/_deprecated_sources/TEX_FILES/shpclmove.tex deleted file mode 100644 index a9cddd195..000000000 --- a/_deprecated_sources/TEX_FILES/shpclmove.tex +++ /dev/null @@ -1,42 +0,0 @@ -\bAPI{SHPCLMOVE}{Extends a symmetric heap block or copies the contents of the block into a larger block.} -\synF %Synopsis for FORTRAN API - -POINTER (addr, A(1)) -INTEGER length, status, abort -CALL SHPCLMOVE (addr, length, status, abort) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{INOUT}{addr}{On entry, first word address of the block to change; on - exit, the new address of the block if it was moved.} - \argRow{IN}{length}{Requested new total length in words. One word - is \CONST{32} bits.} - \argRow{OUT}{status}{Status is \CONST{0} if the block was extended in place, \CONST{1} if it - was moved, and a negative integer for the type of error detected.} - \argRow{IN}{abort}{Abort code. Nonzero requests abort on error; \CONST{0} requests - an error code.} -} -%API description - { The \FUNC{SHPCLMOVE} routine either extends a symmetric heap block if the - block is followed by a large enough free block or copies the contents - of the existing block to a larger block and returns a status code - indicating that the block was moved. This routine also can reduce the - size of a block if the new length is less than the old length. All - \ac{PE}s in a program must call \FUNC{SHPCLMOVE} with the - same value of \VAR{addr} to maintain symmetric heap consistency; if any \ac{PE}s - are missing, the program hangs. - } - %API Description Table. -{ - %Return Values -\desR{ -\cRow{Error Code} {Condition} -\cRow{\CONST{-1} } {Length is not an integer greater than \CONST{0}} -\cRow{\CONST{-2}} { No more memory is available from the system (checked if the request cannot be satisfied from the available blocks on the symmetric heap).} -\cRow{\CONST{-3}}{ Address is outside the bounds of the symmetric heap.} -\cRow{\CONST{-4}}{Block is already free.} -\cRow{\CONST{-5}}{Address is not at the beginning of a block.} -}%end of DesB -\notesB{None.} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/shpdeallc.tex b/_deprecated_sources/TEX_FILES/shpdeallc.tex deleted file mode 100644 index 85a15df40..000000000 --- a/_deprecated_sources/TEX_FILES/shpdeallc.tex +++ /dev/null @@ -1,35 +0,0 @@ -\bAPI{SHPDEALLC}{Returns a memory block to the symmetric heap.} -\synF %Synopsis for FORTRAN API - -POINTER (addr, A(1)) -INTEGER errcode, abort -CALL SHPDEALLC(addr, errcode, abort) %*\synFE %DO NOT DELETE. THIS LINE IS NOT A COMMENT - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{IN}{addr}{ First word address of the block to deallocate.} - \argRow{OUT}{errcode}{Error code is 0 if no error was detected; otherwise, it is a negative integer code for the type of error.} - \argRow{IN}{abort}{Abort code. Nonzero requests abort on error; \CONST{0} requests - an error code.} - } -%API description - { - SHPDEALLC returns a block of memory (allocated using \FUNC{SHPALLOC}) to the - list of available space in the symmetric heap. To maintain symmetric - heap consistency, all \ac{PE}s in a program must call - \FUNC{SHPDEALLC} with the same value of \VAR{addr}; if any \ac{PE}s are missing, the program hangs. - } -%API Description Table. -{ - %Return Values -\desR{ -\cRow{Error Code} {Condition} -\cRow{ \CONST{-1} } {Length is not an integer greater than 0} -\cRow{\CONST{-2}} { No more memory is available from the system (checked if the request cannot be satisfied from the available blocks on the symmetric heap).} -\cRow{\CONST{-3}}{ Address is outside the bounds of the symmetric heap.} -\cRow{\CONST{-4}}{Block is already free.} -\cRow{\CONST{-5}}{Address is not at the beginning of a block.} -}%end of DesB -\notesB{None.} -} -\eAPI diff --git a/_deprecated_sources/TEX_FILES/start_pes.tex b/_deprecated_sources/TEX_FILES/start_pes.tex deleted file mode 100644 index d55739661..000000000 --- a/_deprecated_sources/TEX_FILES/start_pes.tex +++ /dev/null @@ -1,47 +0,0 @@ -\bAPI{START\_PES}{ Called at the beginning of an \openshmem program to -initialize the execution environment. (This routine is deprecated and is -provided for backwards compatibility. Implementations must include it, and the -routines should function properly while notifying the user about deprecation -of its use.)} -\synC -void start_pes(int npes); %*\synCE -\synF -CALL START_PES(npes) %*\synFE - -% Arguments table. If no arguments you can use \argRow{None}{}{} -\desB{ - \argRow{npes}{Unused}{ Should be set to \CONST{0}.} -} - %API description - { - The \FUNC{start\_pes} routine initializes the \openshmem execution - environment. An \openshmem program must call - \FUNC{start\_pes} before calling any other \openshmem routine. - } - %API Description Table. -{ - %Return Values -\desR{ None. } -\notesB{ - If any other \openshmem call occurs before \FUNC{start\_pes}, the - behavior is undefined. Although it is recommended to set \VAR{npes} to - \CONST{0} for \FUNC{start\_pes}, this is not mandated. The value is ignored. - Calling \FUNC{start\_pes} more than once has no subsequent - effect. -} -}%end of DesB -% Notes. If there are no notes, this field can be left empty. - \notesB{ - As of \openshmem Specification 1.2 the use of \FUNC{start\_pes} has - been deprecated. Although \openshmem libraries are required to support the - call, program users are encouraged to use \FUNC{shmem\_init} instead. -} -%Example -\exampleB{ -%For each example, you can enter it as an item. -\exampleITEMF - { This is a simple program that calls \FUNC{start\_pes}:} - {./EXAMPLES/shmem_startpes_example.f90} - {} -} -\eAPI diff --git a/_deprecated_sources/coverpage.tex b/_deprecated_sources/coverpage.tex deleted file mode 100644 index 9aabd73dc..000000000 --- a/_deprecated_sources/coverpage.tex +++ /dev/null @@ -1,99 +0,0 @@ -\thispagestyle{empty} -\begin{center} -\textbf{\Huge \openshmem} -\par -\end{center} - -\begin{center} -\textbf{\LARGE Application Programming Interface}\\ -\includegraphics[scale=0.65]{OpenSHMEM_Pound}\\ -\url{http://www.openshmem.org/} -\par -\end{center} - -\begin{center} -Version \insertDocVersion -\par -\end{center} - -\vspace{0.5in} -\begin{center} -\today -\end{center} - -\vspace{0.5in} - -%\begin{center} -%\textbf{{\large The comment period for \textcolor{red}{Version \insertDocVersion{}} ended on \textcolor{red}{January 31, 2012}}} -%\par -%\textbf{{\large The comment period for \textcolor{blue}{Version 1.1} started on \textcolor{blue}{February 1, 2012}}} -%\par -%\end{center} - -\vfill{} - -\section*{Developed by} -\begin{itemize} -\item High Performance Computing Tools group at the University of Houston\\ - \url{http://www.cs.uh.edu/~hpctools/} -\item Extreme Scale Systems Center, Oak Ridge National Laboratory\\ - \url{http://www.csm.ornl.gov/essc/} -\end{itemize} -\pagebreak{} - -\section*{Sponsored by} -\begin{itemize} -\item \ac{DoD}\\ - \url{http://www.defense.gov/ } -\item \ac{ORNL}\\ - \url{http://www.ornl.gov/} -\end{itemize} - -\section*{Authors and Collaborators} -\begin{itemize} -\item Monika ten Bruggencate, Cray Inc. -\item David Knaak, Cray Inc. -\item Jens Manser, \ac{DoD} -\item Nick Park, \ac{DoD} -\item Lauren Smith, \ac{DoD} -\item James Dinan, Intel -\item Jeff Hammond, Intel -\item Jeff Kuehn, \ac{LANL} -\item Swaroop Pophale, Mellanox -\item Eduardo D'Azevedo, \ac{ORNL} -\item Manjunath Gorentla Venkata, \ac{ORNL} -\item Oscar Hernandez, \ac{ORNL} -\item Gregory Koenig, \ac{ORNL} -\item Graham Lopez, \ac{ORNL} -\item Pavel Shamis, \ac{ORNL} -\item Steve Poole, OSSS -\item Karl Feind, SGI -\item Michael Raymond, SGI -\item Barbara Chapman, \ac{UH} -\item Tony Curtis, \ac{UH} -\item Sameer Shende, \ac{UO} -\end{itemize} - -\date{\today} - -\section*{Acknowledgements} -The \openshmem specification belongs to Open Source Software Solutions, Inc. (OSSS), a non-profit organization, under an agreement with SGI. The development work of the specification is supported by the Oak Ridge National Laboratory Extreme Scale Systems Center and the Department of Defense.\\ -\\ -We would also like to acknowledge the contribution of the members of the \openshmem mailing list for their ideas, discussions, suggestions, and constructive criticism which has helped us improve this document. - - -%The following people (listed alphabetically) have contributed ideas, -%criticisms and suggestions on the openshmem mailing list and in other -%fora: -% -%Vikas Aggarwal; Brian W. Barrett; Christian Bell; Max Billingsley -%III; Mark Debbage; Mike Dubman; Dick Foster; Hal Finkel; Roger A. -%Golliver; Jeff Hammond; Alistair Hart; Tsai-yang Jea; Daniel Kidger; -%Rishi Khan; David LaFrance-Linden; John Leidel; Alexander Mikheev; -%Chen Qi; Duncan Roweth; Sameer Shende; Marc Snir; Lawrence Stewart; -%Keith D. Underwood; Brian Wibecan. -% -%Apologies to people who have contributed but who are not acknowledged -%here: it is not intentional. - -{\large \pagebreak{}} diff --git a/_deprecated_sources/diagrams/fence.png b/_deprecated_sources/diagrams/fence.png deleted file mode 100644 index d8e53d510..000000000 Binary files a/_deprecated_sources/diagrams/fence.png and /dev/null differ diff --git a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_fence.graffle b/_deprecated_sources/diagrams/mp_lc_unicos_shmem_fence.graffle deleted file mode 100755 index da3aa17da..000000000 Binary files a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_fence.graffle and /dev/null differ diff --git a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_fence.pdf b/_deprecated_sources/diagrams/mp_lc_unicos_shmem_fence.pdf deleted file mode 100755 index 72ad7cc15..000000000 Binary files a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_fence.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_quiet.graffle b/_deprecated_sources/diagrams/mp_lc_unicos_shmem_quiet.graffle deleted file mode 100755 index c304675f8..000000000 Binary files a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_quiet.graffle and /dev/null differ diff --git a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_quiet.pdf b/_deprecated_sources/diagrams/mp_lc_unicos_shmem_quiet.pdf deleted file mode 100755 index 0224f0931..000000000 Binary files a/_deprecated_sources/diagrams/mp_lc_unicos_shmem_quiet.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/mpi_fence.graffle b/_deprecated_sources/diagrams/mpi_fence.graffle deleted file mode 100755 index 9ad1adf60..000000000 Binary files a/_deprecated_sources/diagrams/mpi_fence.graffle and /dev/null differ diff --git a/_deprecated_sources/diagrams/mpi_fence.pdf b/_deprecated_sources/diagrams/mpi_fence.pdf deleted file mode 100755 index 4fc5bc940..000000000 Binary files a/_deprecated_sources/diagrams/mpi_fence.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/q_unicos_shmem_fence.graffle b/_deprecated_sources/diagrams/q_unicos_shmem_fence.graffle deleted file mode 100755 index e96b4819e..000000000 Binary files a/_deprecated_sources/diagrams/q_unicos_shmem_fence.graffle and /dev/null differ diff --git a/_deprecated_sources/diagrams/q_unicos_shmem_fence.pdf b/_deprecated_sources/diagrams/q_unicos_shmem_fence.pdf deleted file mode 100755 index 1f229d576..000000000 Binary files a/_deprecated_sources/diagrams/q_unicos_shmem_fence.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/q_unicos_shmem_quiet.graffle b/_deprecated_sources/diagrams/q_unicos_shmem_quiet.graffle deleted file mode 100755 index e5d539a6b..000000000 Binary files a/_deprecated_sources/diagrams/q_unicos_shmem_quiet.graffle and /dev/null differ diff --git a/_deprecated_sources/diagrams/q_unicos_shmem_quiet.pdf b/_deprecated_sources/diagrams/q_unicos_shmem_quiet.pdf deleted file mode 100755 index 2986e0d97..000000000 Binary files a/_deprecated_sources/diagrams/q_unicos_shmem_quiet.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/quiet.png b/_deprecated_sources/diagrams/quiet.png deleted file mode 100644 index 6d1c1f347..000000000 Binary files a/_deprecated_sources/diagrams/quiet.png and /dev/null differ diff --git a/_deprecated_sources/diagrams/symmetric-variables.pdf b/_deprecated_sources/diagrams/symmetric-variables.pdf deleted file mode 100644 index ec297cd87..000000000 Binary files a/_deprecated_sources/diagrams/symmetric-variables.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/updated/barrier.graffle b/_deprecated_sources/diagrams/updated/barrier.graffle deleted file mode 100644 index 7017d98ed..000000000 --- a/_deprecated_sources/diagrams/updated/barrier.graffle +++ /dev/null @@ -1,1703 +0,0 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-04 17:59:39 +0000 - Creator - Pavel Shamis - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{1019.19970703125, 439.24954986572266}, {10, 86.411979675292969}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 111 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{923.19970703125, 402.41136169433594}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 110 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_put(\'85)} - - - - Bounds - {{1019.19970703125, 175.4190788269043}, {10, 220.25688171386719}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 108 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{923.19970703125, 137.69076538085938}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 104 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_get (...)} - - - - Bounds - {{989.19970962173875, 33}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 103 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE K} - VerticalPad - 0 - - - - Bounds - {{1019.19970703125, 84.500225067138672}, {10, 47.345275149512176}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 102 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{783.9970118678325, 503.57363891601562}, {10, 22.087890625}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 91 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{306.002993313145, 363.29468536376953}, {10, 16.499774932861328}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 90 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{782.099853515625, 175.4190788269043}, {10, 86.580917358398438}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 89 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{306.00299072265625, 227.29430389404297}, {10, 34.705696105957031}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 88 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{307.900146484375, 145.91264724731445}, {10, 34.705696105957031}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 87 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{687.99700927734375, 465}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 85 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{211.900146484375, 392}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 84 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_get (...)} - - - - Bounds - {{211.900146484375, 109.61833953857422}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 83 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_fadd(...)} - - - - Bounds - {{211.900146484375, 191}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 82 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{211.900146484375, 272.38165855407715}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 81 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier(...)} - - - - Bounds - {{686.99888515472412, 272.24234842703663}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 79 - Line - - ID - 56 - Position - 0.6342540979385376 - RotationType - 0 - - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier(...)} - - - - Bounds - {{211.900146484375, 314.83816909790039}, {676.19970703125, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 78 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\deftab720 -\pard\pardeftab720\sa240\qc - -\f0\fs32 \cf0 All local and remote memory operations issued by PEs are guaranteed to be completed before any PE returns from the call.} - - - - Class - LineGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 77 - Points - - {277.90014907486375, 286} - {449, 284.7196044921875} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - - - Bounds - {{306.00299072265625, 431.66152954101562}, {10, 94}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 76 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{277.90014907486375, 33}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 75 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 2} - VerticalPad - 0 - - - - Bounds - {{307.900146484375, 84.500225067138672}, {10, 16.499774932861328}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 86 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Class - LineGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 56 - Points - - {651, 286.21963500976562} - {867, 286} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - Tail - - ID - 45 - Info - 3 - - - - Bounds - {{782.099853515625, 368}, {10, 87}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 53 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{545.00000259048875, 497.98556518554688}, {10, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 52 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{449, 454.54486083984375}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 51 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_p (...)} - - - - Bounds - {{449, 411.10415649414062}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 50 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{545.00000259048875, 368}, {10, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 49 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{686.099853515625, 137.69076538085938}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 48 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_add (...)} - - - - Bounds - {{449, 158.35284042358398}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 47 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_put(\'85)} - - - - Bounds - {{449, 272.38165283203125}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 45 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier(...)} - - - - Bounds - {{449, 124.67642211914062}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 36 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{545.00000259048875, 197}, {10, 65}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 35 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{545, 84.500228881835938}, {10, 34.705696105957031}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 33 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{752.09985610611375, 33}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 30 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 1} - VerticalPad - 0 - - - - Bounds - {{515, 33}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 28 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 0} - VerticalPad - 0 - - - - Bounds - {{782.099853515625, 84.500225067138672}, {10, 47.345275149512176}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 12 - - ID - 1 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{195, 18}, {711, 541}} - Class - ShapedGraphic - FitText - Clip - Flow - Clip - FontInfo - - Font - Helvetica - Size - 12 - - ID - 4 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Draws - NO - - shadow - - Draws - NO - - stroke - - CornerRadius - 5 - Pattern - 11 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\b\fs36 \cf0 Active Set} - - TextPlacement - 2 - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2014-02-21 22:19:56 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{128, 186}, {1396, 925}} - ListView - - OutlineWidth - 142 - RightSidebar - - ShowRuler - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{-55, -26}, {1261, 786}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - diff --git a/_deprecated_sources/diagrams/updated/barrier.pdf b/_deprecated_sources/diagrams/updated/barrier.pdf deleted file mode 100644 index b4440cdf1..000000000 Binary files a/_deprecated_sources/diagrams/updated/barrier.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/updated/barrierall.graffle b/_deprecated_sources/diagrams/updated/barrierall.graffle deleted file mode 100644 index 18c0ea2a0..000000000 --- a/_deprecated_sources/diagrams/updated/barrierall.graffle +++ /dev/null @@ -1,1962 +0,0 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-04 17:59:39 +0000 - Creator - Pavel Shamis - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{807.74809122085571, 269.31585693359375}, {22.6015625, 22.087890625}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 122 - Shape - Circle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - VerticalPad - 0 - - - - Bounds - {{765.79851770401001, 269.31585693359375}, {22.6015625, 22.087890625}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 121 - Shape - Circle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - VerticalPad - 0 - - - - Bounds - {{723.84894418716431, 269.31585693359375}, {22.6015625, 22.087890625}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 120 - Shape - Circle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - VerticalPad - 0 - - - - Bounds - {{938.199770656895, 455.58792209625244}, {10, 65.0736083984375}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 116 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{842.19976806640625, 417.83816528320312}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 115 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_p (...)} - - - - Bounds - {{936.3026123046875, 360.41908264160156}, {10, 47.345275149512176}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 114 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{842.19973564147949, 266.52180551932179}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 113 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier_all(\'85)} - - - - Bounds - {{938.19976806640625, 170.4190788269043}, {10, 86.580917358398438}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 108 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{842.19976806640625, 132.69076538085938}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 104 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_get (...)} - - - - Bounds - {{908.199770656895, 28}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 103 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE K} - VerticalPad - 0 - - - - Bounds - {{938.19976806640625, 79.500225067138672}, {10, 47.345275149512176}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 102 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{606.9970118678325, 498.57363891601562}, {10, 22.087890625}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 91 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{129.002993313145, 358.29468536376953}, {10, 16.499774932861328}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 90 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{605.099853515625, 170.4190788269043}, {10, 86.580917358398438}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 89 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{129.00299072265625, 222.29430389404297}, {10, 34.705696105957031}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 88 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{130.900146484375, 140.91264724731445}, {10, 34.705696105957031}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 87 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{510.99700927734375, 460}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 85 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{39, 387}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 84 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_get (...)} - - - - Bounds - {{34.900146484375, 104.61833953857422}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 83 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_fadd(...)} - - - - Bounds - {{34.900146484375, 186}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 82 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{33.002983093261719, 267.38165855407715}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 81 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier_all(\'85)} - - - - Bounds - {{509.99888515472412, 267.24234842703663}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 79 - Line - - ID - 56 - Position - 0.6342540979385376 - RotationType - 0 - - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier_all(\'85)} - - - - Bounds - {{39, 312.83816909790039}, {1005.1997680664062, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 78 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\deftab720 -\pard\pardeftab720\sa240\qc - -\f0\fs32 \cf0 All local and remote memory operations issued by PEs are guaranteed to be completed before any PE returns from the call.} - - - - Class - LineGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 77 - Points - - {100.90014907486375, 281} - {272, 279.7196044921875} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - - - Bounds - {{129.00299072265625, 426.66152954101562}, {10, 94}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 76 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{100.90014907486375, 28}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 75 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 2} - VerticalPad - 0 - - - - Bounds - {{130.900146484375, 79.500225067138672}, {10, 16.499774932861328}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 86 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Class - LineGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 56 - Points - - {474, 281.21963500976562} - {690, 281} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - Tail - - ID - 45 - Info - 3 - - - - Bounds - {{605.099853515625, 363}, {10, 87}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 53 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{368.00000259048875, 492.98556518554688}, {10, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 52 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{272, 449.54486083984375}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 51 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_p (...)} - - - - Bounds - {{272, 406.10415649414062}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 50 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{368.00000259048875, 363}, {10, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 49 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{509.099853515625, 132.69076538085938}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 48 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_add (...)} - - - - Bounds - {{272, 153.35284042358398}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 47 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_long_put(\'85)} - - - - Bounds - {{272, 267.38165283203125}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 45 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_barrier_all(\'85)} - - - - Bounds - {{272, 119.67642211914062}, {202, 27.67596435546875}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 36 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (...)} - - - - Bounds - {{368.00000259048875, 192}, {10, 65}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 35 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{368, 79.500228881835938}, {10, 34.705696105957031}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 33 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{575.09985610611375, 28}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 30 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 1} - VerticalPad - 0 - - - - Bounds - {{338, 28}, {70, 39}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 28 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 0} - VerticalPad - 0 - - - - Bounds - {{605.099853515625, 79.500225067138672}, {10, 47.345275149512176}} - Class - ShapedGraphic - FontInfo - - Font - Helvetica - Size - 16 - - ID - 1 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{19, 20}, {1039, 541}} - Class - ShapedGraphic - FitText - Clip - Flow - Clip - FontInfo - - Font - Helvetica - Size - 16 - - ID - 112 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Draws - NO - - shadow - - Draws - NO - - stroke - - CornerRadius - 5 - Pattern - 11 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\b\fs36 \cf0 All PEs} - - TextPlacement - 2 - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2014-02-21 22:20:28 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{148, 166}, {1396, 925}} - ListView - - OutlineWidth - 142 - RightSidebar - - ShowRuler - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{-55, -26}, {1261, 786}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - diff --git a/_deprecated_sources/diagrams/updated/barrierall.pdf b/_deprecated_sources/diagrams/updated/barrierall.pdf deleted file mode 100644 index 261b205c8..000000000 Binary files a/_deprecated_sources/diagrams/updated/barrierall.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/updated/fence.graffle b/_deprecated_sources/diagrams/updated/fence.graffle deleted file mode 100644 index b55334482..000000000 --- a/_deprecated_sources/diagrams/updated/fence.graffle +++ /dev/null @@ -1,1347 +0,0 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-04 16:40:34 +0000 - Creator - Pavel Shamis - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{692.08169311687629, 147.13285578463007}, {80.236351013183594, 15}} - Class - ShapedGraphic - ID - 72 - Rotation - 23.090459823608398 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{126, 422.08683776855469}, {156, 33}} - Class - ShapedGraphic - ID - 71 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 value5 will be delivered -\b after -\b0 value2 } - - - - Bounds - {{816, 408.53243383911206}, {156, 52.123282103270782}} - Class - ShapedGraphic - ID - 70 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 value4 will be delivered -\b after -\b0 value1 and value3} - - - - Bounds - {{816, 221.22673429816666}, {156, 52.162608963552088}} - Class - ShapedGraphic - ID - 69 - Shape - Rectangle - Style - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 value1 and value3 are delivered to PE1, -\b before -\b0 value4 } - - - - Bounds - {{124, 234.38934326171875}, {160, 39}} - Class - ShapedGraphic - ID - 67 - Shape - Rectangle - Style - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 value2 is delivered to PE2, -\b before -\b0 value5} - - - - Bounds - {{318.38364287880222, 428.35002387956683}, {94.032638549804688, 15}} - Class - ShapedGraphic - ID - 65 - Rotation - 150.97589111328125 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Class - LineGraphic - ID - 63 - Points - - {234.5, 286.7196044921875} - {449, 287.21958541870117} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - - - Bounds - {{307.90014171600342, 285.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 62 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{277.90014907486375, 33}, {70, 39}} - Class - ShapedGraphic - ID - 61 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 2} - VerticalPad - 0 - - - - Bounds - {{307.90014171600342, 84.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 60 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{688.72929771064776, 378.17150587954552}, {87.757164001464844, 15}} - Class - ShapedGraphic - ID - 57 - Rotation - 26.135757446289062 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Class - LineGraphic - ID - 56 - Points - - {648.599853515625, 286.21963500976562} - {865.5, 286.7196159362793} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - Tail - - ID - 45 - Info - 3 - - - - Bounds - {{321.78649287866045, 186.12392821924863}, {87.226936340332031, 15}} - Class - ShapedGraphic - ID - 54 - Rotation - 153.6458740234375 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{782.099853515625, 285.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 53 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{545.00000259048875, 442.54342651367188}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 52 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{417.7000732421875, 397.63874816894531}, {264.599853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 51 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr5, value5, PE 2)} - - - - Bounds - {{417.7000732421875, 352.73406982421875}, {264.599853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 50 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr4, value4, PE 1)} - - - - Bounds - {{545.00000259048875, 312.557861328125}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 49 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{417.7000732421875, 192.02925872802734}, {264.599853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 48 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr3, value3, PE 1)} - - - - Bounds - {{417.7000732421875, 158.35284042358398}, {264.599853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 47 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr2, value2, PE 2)} - - - - Bounds - {{446.599853515625, 272.38165283203125}, {202, 27.67596435546875}} - Class - ShapedGraphic - ID - 45 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_fence()} - - - - Bounds - {{417.7000732421875, 124.67642211914062}, {264.599853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 36 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr1, value1, PE 1)} - - - - Bounds - {{545.00000259048875, 232.20545196533203}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 35 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{545, 84.500228881835938}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 33 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{692.08170897171181, 216.16212434557292}, {80.236351013183594, 15}} - Class - ShapedGraphic - ID - 27 - Rotation - 23.090459823608398 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{752.09985610611375, 33}, {70, 39}} - Class - ShapedGraphic - ID - 30 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 1} - VerticalPad - 0 - - - - Bounds - {{515, 33}, {70, 39}} - Class - ShapedGraphic - ID - 28 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 0} - VerticalPad - 0 - - - - Bounds - {{782.099853515625, 84.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 1 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2014-02-21 22:11:34 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{88, 76}, {1394, 1102}} - ListView - - OutlineWidth - 142 - RightSidebar - - ShowRuler - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{-54, -115}, {1259, 963}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - diff --git a/_deprecated_sources/diagrams/updated/fence.pdf b/_deprecated_sources/diagrams/updated/fence.pdf deleted file mode 100644 index eb8626c73..000000000 Binary files a/_deprecated_sources/diagrams/updated/fence.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/updated/mem_model.graffle b/_deprecated_sources/diagrams/updated/mem_model.graffle deleted file mode 100644 index d15066dfb..000000000 --- a/_deprecated_sources/diagrams/updated/mem_model.graffle +++ /dev/null @@ -1,880 +0,0 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-07 20:20:54 +0000 - Creator - Shamis, Pavel - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{345.46856480021546, -6.2657626102435415}, {117.30572509765625, 784.2421875}} - Class - ShapedGraphic - ID - 64 - Rotation - 269.79998779296875 - Shape - Rectangle - Style - - fill - - Draws - NO - - shadow - - Draws - NO - - stroke - - CornerRadius - 9 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs26 \cf0 Private Data Objects} - - TextPlacement - 0 - - - Bounds - {{427, 179}, {186.5, 23}} - Class - ShapedGraphic - ID - 63 - Shape - Rectangle - Style - - shadow - - Draws - NO - - stroke - - CornerRadius - 9 - Pattern - 24 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 X = shmem_malloc(sizeof(long))} - - TextPlacement - 0 - - - Bounds - {{661.92510691815664, 197.5}, {72.14975041597333, 23}} - Class - ShapedGraphic - ID - 55 - Shape - Rectangle - Style - - stroke - - Pattern - 24 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Variable: X } - - - - Bounds - {{305.92512580757329, 198.14959716796875}, {72.14975041597333, 23}} - Class - ShapedGraphic - ID - 56 - Shape - Rectangle - Style - - stroke - - Pattern - 24 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Variable: X } - - - - Bounds - {{116.92511720070233, 198.14959716796875}, {72.14975041597333, 23}} - Class - ShapedGraphic - ID - 57 - Shape - Rectangle - Style - - stroke - - Pattern - 24 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Variable: X } - - - - Bounds - {{91.000000000000057, 191}, {675, 37}} - Class - ShapedGraphic - ID - 58 - Shape - Rectangle - Style - - fill - - Draws - NO - - stroke - - CornerRadius - 9 - Pattern - 24 - - - TextPlacement - 0 - - - Bounds - {{293.73876111654664, -181.3356960694145}, {220.98025512695312, 783.66534423828125}} - Class - ShapedGraphic - ID - 35 - Rotation - 269.79998779296875 - Shape - Rectangle - Style - - fill - - Draws - NO - - shadow - - Draws - NO - - stroke - - CornerRadius - 9 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs26 \cf0 Remotely Accessible Symmetric Data Objects} - - TextPlacement - 0 - - - Bounds - {{560, 248.5}, {23, 23}} - Class - ShapedGraphic - ID - 30 - Shape - Circle - Style - - Text - - VerticalPad - 0 - - - - Bounds - {{509, 248.5}, {23, 23}} - Class - ShapedGraphic - ID - 29 - Shape - Circle - Style - - Text - - VerticalPad - 0 - - - - Bounds - {{458, 248.5}, {23, 23}} - Class - ShapedGraphic - ID - 28 - Shape - Circle - Style - - Text - - VerticalPad - 0 - - - - Class - Group - Graphics - - - Bounds - {{269, 333.03007518797006}, {147, 105.71428571428574}} - Class - ShapedGraphic - ID - 37 - Shape - Rectangle - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Local Variables} - - - - Bounds - {{269, 180.57894736842124}, {147, 133.53383458646618}} - Class - ShapedGraphic - ID - 38 - Shape - Rectangle - Style - - fill - - Color - - b - 0.949358 - g - 0.949358 - r - 0.949358 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Symmetric Heap} - - - - Bounds - {{269, 109.36090225563905}, {147, 58.977443609022586}} - Class - ShapedGraphic - ID - 39 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Global and Static Variables} - - - - Bounds - {{258, 16.999999999999886}, {168, 444.00000000000023}} - Class - ShapedGraphic - ID - 40 - Shape - Rectangle - Style - - stroke - - CornerRadius - 9 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\b\fs40 \cf0 PE 1} - - TextPlacement - 0 - - - ID - 36 - - - Class - Group - Graphics - - - Bounds - {{80, 333.0300751879696}, {147, 105.71428571428572}} - Class - ShapedGraphic - ID - 42 - Shape - Rectangle - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Local Variables} - - - - Bounds - {{80, 180.57894736842147}, {147, 133.53383458646618}} - Class - ShapedGraphic - ID - 43 - Shape - Rectangle - Style - - fill - - Color - - b - 0.949358 - g - 0.949358 - r - 0.949358 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Symmetric Heap} - - - - Bounds - {{80, 109.36090225563876}, {147, 58.977443609022586}} - Class - ShapedGraphic - ID - 44 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Global and Static Variables} - - - - Bounds - {{69, 16.999999999999858}, {168, 444.00000000000028}} - Class - ShapedGraphic - ID - 45 - Shape - Rectangle - Style - - stroke - - CornerRadius - 9 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\b\fs40 \cf0 PE 0} - - TextPlacement - 0 - - - ID - 41 - - - Class - Group - Graphics - - - Bounds - {{625, 333.03007518797006}, {147, 105.71428571428574}} - Class - ShapedGraphic - ID - 69 - Shape - Rectangle - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Local Variables} - - - - Bounds - {{625, 180.57894736842115}, {147, 133.53383458646621}} - Class - ShapedGraphic - ID - 68 - Shape - Rectangle - Style - - fill - - Color - - b - 0.949358 - g - 0.949358 - r - 0.949358 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Symmetric Heap} - - - - Bounds - {{625, 109.3609022556391}, {147, 58.977443609022579}} - Class - ShapedGraphic - ID - 67 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs24 \cf0 Global and Static Variables} - - - - Bounds - {{614, 16.999999999999943}, {168, 444.00000000000011}} - Class - ShapedGraphic - ID - 66 - Shape - Rectangle - Style - - stroke - - CornerRadius - 9 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\b\fs40 \cf0 PE N-1} - - TextPlacement - 0 - - - ID - 65 - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2015-02-04 21:52:01 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{317, 306}, {1286, 872}} - ListView - - OutlineWidth - 142 - RightSidebar - - ShowRuler - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{0, 0}, {1151, 733}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - diff --git a/_deprecated_sources/diagrams/updated/mem_model.pdf b/_deprecated_sources/diagrams/updated/mem_model.pdf deleted file mode 100644 index c7fe2cdf1..000000000 Binary files a/_deprecated_sources/diagrams/updated/mem_model.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/updated/quiet.graffle b/_deprecated_sources/diagrams/updated/quiet.graffle deleted file mode 100644 index d01f3f254..000000000 --- a/_deprecated_sources/diagrams/updated/quiet.graffle +++ /dev/null @@ -1,1309 +0,0 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-04 18:37:20 +0000 - Creator - Pavel Shamis - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{70.096904063309495, 405.61554727051521}, {76.921333312988281, 15}} - Class - ShapedGraphic - ID - 65 - Rotation - 153.57574462890625 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{70.121753001297776, 163.67627997882585}, {77.871635437011719, 15}} - Class - ShapedGraphic - ID - 54 - Rotation - 153.82313537597656 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Class - LineGraphic - Head - - ID - 45 - - ID - 77 - Points - - {16, 264} - {224, 264.21963500976562} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - - - Bounds - {{548.94970253341262, 123}, {198, 125.83084106445312}} - Class - ShapedGraphic - ID - 70 - Shape - Rectangle - Style - - fill - - Color - - b - 0.999991 - g - 0.999974 - r - 1 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 PE K is any PE in the system.\ -\ -value1, value2, and value3\ -are delivered to target PEs and visible for PE K after the shmem_quiet() call.} - - - - Bounds - {{45.999997409511252, 263.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 76 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{16, 11}, {70, 39}} - Class - ShapedGraphic - ID - 75 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 2} - VerticalPad - 0 - - - - Bounds - {{45.999997409511252, 62.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 74 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{765.69970444076125, 263.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 62 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{735.69970703125, 11}, {70, 39}} - Class - ShapedGraphic - ID - 61 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE K} - VerticalPad - 0 - - - - Bounds - {{765.69970444076125, 62.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 60 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{432.95552725825144, 360.83631934136469}, {79.163833618164062, 15}} - Class - ShapedGraphic - ID - 57 - Rotation - 29.242952346801758 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Class - LineGraphic - ID - 56 - Points - - {352.099853515625, 264.21963500976562} - {805.69970703125, 263} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - Tail - - ID - 45 - Info - 3 - - - - Bounds - {{429.58094966987062, 193.41368125253854}, {77.487899780273438, 15}} - Class - ShapedGraphic - ID - 55 - Rotation - 24.251314163208008 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{520.19970444076125, 263.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 53 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{283.099853515625, 420.54342651367188}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 52 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{156.54992416732375, 377.10272216796875}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 51 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr5, value5, PE 2)} - - - - Bounds - {{156.54992416732375, 333.66201782226562}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 50 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr4, value4, PE 0)} - - - - Bounds - {{283.099853515625, 290.557861328125}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 49 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{156.54992416732375, 172.49285888671875}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 48 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr3, value3, PE 0)} - - - - Bounds - {{156.54992416732375, 136.35284042358398}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 47 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr2, value2, PE 2)} - - - - Bounds - {{224, 250.38165283203125}, {128.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 45 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_quiet()} - - - - Bounds - {{156.54992416732375, 102.67642211914062}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 36 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr1, value1, PE 0)} - - - - Bounds - {{283.099853515625, 210.20545196533203}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 35 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{283.09985092513625, 62.500228881835938}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 33 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{432.56935019526316, 127.00000037163812}, {73.461151123046875, 15}} - Class - ShapedGraphic - ID - 27 - Rotation - 24.960399627685547 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{490.19970703125, 11}, {70, 39}} - Class - ShapedGraphic - ID - 30 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs28 \cf0 PE 1} - VerticalPad - 0 - - - - Bounds - {{253.09985092513625, 11}, {70, 39}} - Class - ShapedGraphic - ID - 28 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 0} - VerticalPad - 0 - - - - Bounds - {{520.19970444076125, 62.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 1 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2014-02-21 22:17:58 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{108, 206}, {1434, 902}} - ListView - - OutlineWidth - 142 - RightSidebar - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{-81, -37}, {1314, 808}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - diff --git a/_deprecated_sources/diagrams/updated/quiet.pdf b/_deprecated_sources/diagrams/updated/quiet.pdf deleted file mode 100644 index 3f674f238..000000000 Binary files a/_deprecated_sources/diagrams/updated/quiet.pdf and /dev/null differ diff --git a/_deprecated_sources/diagrams/updated/wait.graffle b/_deprecated_sources/diagrams/updated/wait.graffle deleted file mode 100644 index ba0b04fdd..000000000 --- a/_deprecated_sources/diagrams/updated/wait.graffle +++ /dev/null @@ -1,685 +0,0 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-04 15:42:44 +0000 - Creator - Pavel Shamis - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{550, 231}, {201, 29}} - Class - ShapedGraphic - ID - 71 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 The -\i addr -\i0 is updated to -\i value} - - - - Bounds - {{576, 66}, {201, 58.824271203326333}} - Class - ShapedGraphic - ID - 70 - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 shmem_wait is a blocking operation therefore it waits until value in -\i addr -\i0 is updated} - - - - Bounds - {{306, 102}, {258.900146484375, 39}} - Class - ShapedGraphic - ID - 37 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;\f1\froman\fcharset0 Times-Roman;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_wait_until \ -(addr, _ -\f1 SHMEM_CMP_EQ -\f0 , value)} - - - - Bounds - {{25.5, 155.00045776367188}, {248, 27.67596435546875}} - Class - ShapedGraphic - ID - 36 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr, value, PE 1)} - - - - Bounds - {{430.45007324218756, 288.82402801513672}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 35 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{430.45007324218756, 150.41201782226562}, {10, 82.17596435546875}} - Class - ShapedGraphic - ID - 34 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{144.5, 81.500228881835938}, {10, 64.683792114257812}} - Class - ShapedGraphic - ID - 33 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{332.50000000000006, 242}, {205.900146484375, 39}} - Class - ShapedGraphic - ID - 32 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_wait_until(...)\ -is -\b completed} - - - - Bounds - {{276.45874781096438, 184.00000413943079}, {88.682357788085938, 15}} - Class - ShapedGraphic - ID - 27 - Rotation - 31.245416641235352 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{400.45007324218756, 30}, {70, 39}} - Class - ShapedGraphic - ID - 30 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 1} - VerticalPad - 0 - - - - Bounds - {{430.45007324218756, 81.500228881835938}, {10, 12.351699829101562}} - Class - ShapedGraphic - ID - 29 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{114.5, 30}, {70, 39}} - Class - ShapedGraphic - ID - 28 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 0} - VerticalPad - 0 - - - - Bounds - {{144.5, 191.49285888671875}, {10, 125.00713348388672}} - Class - ShapedGraphic - ID - 1 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2014-02-10 21:26:55 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{398, 136}, {1067, 872}} - ListView - - OutlineWidth - 142 - RightSidebar - - ShowRuler - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{0, 0}, {932, 733}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - diff --git a/_deprecated_sources/diagrams/updated/wait.pdf b/_deprecated_sources/diagrams/updated/wait.pdf deleted file mode 100644 index 6ece7fddc..000000000 Binary files a/_deprecated_sources/diagrams/updated/wait.pdf and /dev/null differ diff --git a/_deprecated_sources/main_spec.tex b/_deprecated_sources/main_spec.tex deleted file mode 100755 index 323873435..000000000 --- a/_deprecated_sources/main_spec.tex +++ /dev/null @@ -1,286 +0,0 @@ -\documentclass[10pt]{book} -\usepackage[letterpaper,top=2.5cm,bottom=2.5cm,left=2.5cm,right=2.5cm]{geometry} -\usepackage{makeidx} -\usepackage{graphicx} -\usepackage{multicol} -\usepackage[normalem]{ulem} -\usepackage{float} -\usepackage{listings} -\usepackage[usenames,dvipsnames]{color} -\usepackage{amsmath} -\usepackage{ifthen} -\usepackage[table]{xcolor} -\usepackage{textcomp} -\usepackage{alltt} -\usepackage{ifpdf} -\usepackage[UKenglish]{isodate} -\ifpdf -\usepackage[pdftex, - pagebackref=true, - colorlinks=true, - linkcolor=blue, - unicode - ]{hyperref} -\else -\usepackage[ps2pdf, - pagebackref=true,chapter - colorlinks=true, - linkcolor=blue, - unicode - ]{hyperref} -\usepackage{pspicture} -\fi -\usepackage[utf8]{inputenc} -\usepackage{mathptmx} -\usepackage{sectsty} -\usepackage{mathptmx} -\usepackage[scaled=.90]{helvet} -\usepackage{courier} -\usepackage{sectsty} -\usepackage[titles]{tocloft} -\usepackage{prettyref} -\usepackage{mdwlist} -\usepackage{enumitem} -\usepackage{framed, color} %SP -\usepackage{pbox} %SP -\definecolor{shadecolor}{rgb}{0.92,0.92,0.92} - -\usepackage{draftcopy} -\usepackage{fancyhdr} -\usepackage{wrapfig} - -\usepackage[nolist]{acronym} - -\usepackage{caption} -\usepackage{subcaption} -%\usepackage[latin]{babel} %causing page headings "CONTENTS" to show as "INDEX". -%%%%%%%%%%%%%%%Borrowed from MPI Spec%%%%%%%% -\usepackage[usenames,dvipsnames]{color} -\definecolor{ListingBG}{rgb}{0.91,0.91,0.91} - - -\include{osh} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\setcounter{secnumdepth}{3} - -\makeindex -% Make content "clickable" -\begin{document} -\hypersetup{pageanchor=true,citecolor=blue} - -% Set Header -\pagestyle{fancy} -\fancyhead{} -\fancyhead[LE,LO]{\insertDocVersion} -%\fancyhead[CO,CE]{--- DRAFT ---} -\fancyfoot[CE,CO]{\thepage}%affects page numbering for the first pages, except the first ToC page -\pagenumbering{roman}%sets coverpage and toc page numbers to roman numerals -\include{coverpage} -\setcounter{tocdepth}{3} -\tableofcontents -\mainmatter % included for use of documenttype 'book' -%\pagestyle{headings} \withlinenumbers -\pagestyle{fancy} \withlinenumbers%replacing {headings} with {fancy} for customization -\fancyhf{} -\fancyhead[RE, LO]{\rightmark} -\fancyhead[RO, LE]{\thepage} -\renewcommand{\headrulewidth}{0pt} -\renewcommand{\thesection}{\arabic{section}} -{ %using setlength to force standardized spacing, if needed -%\setlength{\baselineskip}{3pt plus 3pt minus 3pt} -\setlength{\parskip}{3pt} %plus 3pt minus 9pt} -\startchap -\section*{Introduction} -\label{sec:intro} -%\input{INTRODUCTION/introduction} -%\input{INTRODUCTION/overview} -%\input{INTRODUCTION/introduction_pgas} -\input{INTRODUCTION/osh_project} -\label{subsec:osh_project} -\input{INTRODUCTION/programming_model} -\label{subsec:programming_model} -\input{INTRODUCTION/memory_model} -\label{subsec:memory_model} -\input{INTRODUCTION/new_execution_model} -%\input{INTRODUCTION/original_language_bindings} -\input{INTRODUCTION/language_bindings} %SP: Reverting to original text for now as per discussion on 02/04/2014 -\input{INTRODUCTION/library_constants.tex} -\label{subsec:library_constants} -\input{INTRODUCTION/environment_variables.tex} -\label{subsec:environment_variables} -\label{subsec:language_bindings} -%\input{INTRODUCTION/synchronization_model} %SP:Moving to remote memory routines section as per discussion on 1/21/14 - -\clearpage - -\startchap -\section{OpenSHMEM Library API} -\label{sec:openshmem_library_api} -\subsection{Library Setup, Exit, and Query Routines}%SP: Merging two routines -\input{INTRODUCTION/setup_query_intro} -\label{subsec:shmem_init} -\input{TEX_FILES/shmem_init.tex} %Swaroop, Graham -\input{TEX_FILES/NEW_shmem_my_pe.tex} %Tommy -\label{subsec:shmem_my_pe} -\input{TEX_FILES/NEW_shmem_num_pe.tex}%Tommy -\label{subsec:shmem_num_pe} -\input{TEX_FILES/shmem_finalize.tex} %Swaroop, Graham -\label{subsec:shmem_finalize} -\input{TEX_FILES/shmem_global_exit.tex} %Swaroop, Graham -\label{subsec:shmem_global_exit} %Still in review -\input{TEX_FILES/shmem_pe_accessible} %Oscar -\label{subsec:shmem_pe_accessible} -\input{TEX_FILES/shmem_addr_accessible}%Tommy -\label{subsec:shmem_addr_accessible} -\input{TEX_FILES/shmem_ptr.tex} %Oscar -\label{subsec:shmem_ptr} -\input{TEX_FILES/shmem_version.tex} %Manju -\label{subsec:shmem_version} -\input{TEX_FILES/shmem_name.tex} %Manju -\label{subsec:shmem_name} -\input{TEX_FILES/start_pes.tex} %Swaroop, Graham -\label{subsec:start_pes} -%\startchap -\subsection{Memory Management Routines} -\input{INTRODUCTION/memory_management_intro} -\input{TEX_FILES/shfree.tex}%Tommy -\label{subsec:shfree} -\input{TEX_FILES/shpalloc.tex} %Swaroop -\input{TEX_FILES/shpclmove.tex} %Swaroop -\input{TEX_FILES/shpdeallc.tex} %Swaroop -%\startchap -\input{INTRODUCTION/rma_intro.tex} -\label{subsec:shmem_put} -\input{TEX_FILES/shmem_put.tex} %Oscar -\label{subsec:shmem_p} -\input{TEX_FILES/shmem_p.tex} %Oscar -\input{TEX_FILES/shmem_iput.tex} %Oscar -\label{subsec:shmem_get} -\input{TEX_FILES/shmem_get.tex} %Manju -\label{subsec:shmem_g} -\input{TEX_FILES/shmem_g.tex} %Manju -\input{TEX_FILES/shmem_iget.tex} %Manju -%\startchap -\subsection{Atomic Memory Operations} -\input{INTRODUCTION/atomics_intro} -\label{subsec:shmem_add} -\input{TEX_FILES/shmem_add.tex}%Tommy -\label{subsec:shmem_cswap} -\input{TEX_FILES/shmem_cswap.tex} %Pasha -\label{subsec:shmem_swap} -\input{TEX_FILES/shmem_swap.tex} %Swaroop -\label{subsec:shmem_finc} -\input{TEX_FILES/shmem_finc.tex} %Manju -\label{subsec:shmem_inc} -\input{TEX_FILES/shmem_inc.tex} %Manju -\label{subsec:shmem_fadd} -\input{TEX_FILES/shmem_fadd.tex}%Pasha -%\startchap -\subsection{Collective Routines} -\label{subsec:coll} -\input{INTRODUCTION/collective_intro.tex} -\label{subsec:shmem_barrier_all} -\input{TEX_FILES/shmem_barrier_all.tex} %Pasha -\label{subsec:shmem_barrier} -\input{TEX_FILES/shmem_barrier.tex} %Tommy -\label{subsec:shmem_broadcast} -\input{TEX_FILES/shmem_broadcast.tex} %Pasha -\label{subsec:shmem_collect} %label in shmem_collect.tex, label from this location references page 1 -\input{TEX_FILES/shmem_collect.tex} %Pasha -\label{subsec:shmem_reductions} -\input{TEX_FILES/shmem_reductions.tex} %Swaroop -%\startchap -\subsection{Point-To-Point Synchronization Routines}%SP: Adding a chapter to include wait and wait until -\input{INTRODUCTION/synchronization_intro} %Swaroop -\input{TEX_FILES/shmem_wait.tex} %Swaroop -\label{subsec:shmem_wait} -%\startchap -\subsection{Memory Ordering Routines} %SP: Adding a chapter to include fence and quiet -\label{subsec:memory_order} -\input{INTRODUCTION/ordering_intro} %Swaroop -\label{subsec:shmem_fence} -\input{TEX_FILES/shmem_fence.tex} %Manju -\label{subsec:shmem_quiet} -\input{TEX_FILES/shmem_quiet.tex} %Swaroop -\input{INTRODUCTION/synchronization_model.tex} -%\startchap -\subsection{Distributed Locking Routines} -\input{INTRODUCTION/locks_intro} %Swaroop -\input{TEX_FILES/shmem_lock.tex} %Oscar -%\startchap -\subsection{Cache Management} -\input{INTRODUCTION/deprication.tex} %Pasha -\input{TEX_FILES/shmem_cache.tex} %Pasha - -\clearpage -%\startchap -%\subsection{Extensions Version 1.2} -%\color{red} -%\emph{Ticket \#107} -%\input{TEX_FILES/shmem_version.tex} -%\input{TEX_FILES/shmem_name.tex} -%\color{black} - -%\startchap -% -> Compilation -% -> Writing OpenSHMEM Programs -% -> Application written in Fortran -\appendix -%defining pagestyle for annex -%\pagestyle{plain} \withlinenumbers -\pagestyle{fancy} \withlinenumbers -\fancyhf{} -\fancyhead[RE, LO]{\leftmark} -\fancyhead[RO, LE]{\thepage} -\fancyfoot[CE,CO]{\thepage} -\renewcommand{\headrulewidth}{0pt} -%\fancyfoot[C]{\thepage}%no numbering appearing on non-title pages -\chapter{Writing \openshmem Programs} -\input{INTRODUCTION/writing_programs.tex} -\chapter{Compiling and Running Programs} -\input{INTRODUCTION/compile_exec_applications.tex} -\chapter{Undefined Behavior in \openshmem} -\label{sec:undefined} -\input{INTRODUCTION/undefined_behavior} -\chapter{Interoperability with other Programming Models} -\label{sec:mpi} -\input{INTRODUCTION/mpi_interoperability} -\clearpage -\chapter{History of \openshmem{}} -\input{INTRODUCTION/openshmem_history.tex} -\label{sec:openshmem_history} -\chapter{\openshmem Specification and Deprecated API } -\input{INTRODUCTION/deprecated_API.tex} -\label{sec:dep_api} -\chapter{Changes to this Document} -\input{INTRODUCTION/changelog1.2} -\input{INTRODUCTION/changelog} -%\clearpage -%\startchap -%\section{\openshmem{} program examples} -%\input{INTRODUCTION/examples} -} %end of setlength command -% Manju: Don't move the definitions to the front, it requires to be after content -% Add acronyms here -\begin{acronym} -\acro{RMA}{\emph{Remote Memory Access}} -\acro{RMO}{\emph{Remote Memory Operation}} -\acro{AMO}{\emph{Atomic Memory Operation}} -\acro{PE}{\emph{Processing Element}} -\acrodefplural{PE}[PEs]{\emph{Processing Elements}} -\acro{PGAS}{\emph{Partitioned Global Address Space}} -\acro{API}{\emph{Application Programming Interface}} -\acro{MPI}{\emph{Message Passing Interface}} -\acro{SPMD}{\emph{Single Program Multiple Data}} -\acro{UH}{University of Houston} -\acro{UO}{University of Oregon} -\acro{ORNL}{Oak Ridge National Laboratory} -\acro{LANL}{Los Alamos National Laboratory} -\acro{ESSC}{Extreme Scale Systems Center} -\acro{OSSS}{Open Software System Solutions} -\acro{DoD}{U.S. Department of Defense} -\end{acronym} - - -\end{document} diff --git a/_deprecated_sources/openshmem-book/Makefile b/_deprecated_sources/openshmem-book/Makefile deleted file mode 100644 index b6880c177..000000000 --- a/_deprecated_sources/openshmem-book/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -# -# Copyright (c) 2011, 2012 -# University of Houston System and Oak Ridge National Laboratory. -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# o Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# o Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# o Neither the name of the University of Houston System, Oak Ridge -# National Laboratory nor the names of its contributors may be used to -# endorse or promote products derived from this software without specific -# prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -MAIN_SOURCE = main - -LATEX = pdflatex -BIBTEX = bibtex - -LATEX_SOURCES = $(wildcard *.tex) - -.SUFFIXES: .pdf .tex - -$(MAIN_SOURCE).pdf: $(LATEX_SOURCES) book.bib - $(LATEX) $(MAIN_SOURCE) - $(BIBTEX) $(MAIN_SOURCE) - $(LATEX) $(MAIN_SOURCE) - $(LATEX) $(MAIN_SOURCE) - -.PHONY: all tidy clean - -all: $(MAIN_SOURCE).pdf - -tidy: - rm -f *.aux *.log *.toc *.out - rm -f *.bbl *.blg - rm -f *.lof *.lot *.lol *.tdo - -clean: tidy - rm -f *.pdf diff --git a/_deprecated_sources/openshmem-book/OpenSHMEM_Pound.png b/_deprecated_sources/openshmem-book/OpenSHMEM_Pound.png deleted file mode 100644 index 438e33d90..000000000 Binary files a/_deprecated_sources/openshmem-book/OpenSHMEM_Pound.png and /dev/null differ diff --git a/_deprecated_sources/openshmem-book/about.tex b/_deprecated_sources/openshmem-book/about.tex deleted file mode 100644 index e609e9a00..000000000 --- a/_deprecated_sources/openshmem-book/about.tex +++ /dev/null @@ -1,67 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter{About This Book} - -Why this book exists, rationale for HPC and parallel computing, -high-level overview of SHMEM. - -Can talk about impact of computing on science and modern life. How -pushing boundaries means we need ever bigger and faster computing, and -also a way of harnessing huge machines. - -Introductory discussion of weather forecasting might be good here -since it is a topic that affects everyone and is easy to use to get a -handle on HPC. - -\section{How HPC is Used} - -Talk about how computing is used in science and other disciplines. -Give some examples of how computing is used in various ways in -different subjects. Could be all the way from embedded devices -(leverage TI experience) through traditional laptop/desktop use on to -tablet (ha ha) use and then through to HPC/cloud for imaging, -diagnosis. - -Challenges facing those subjects where HPC is concerned. What does -HPC enable is we go bigger and better with performance delivered? - -Need for HPC to solve those problems. - -\section{Addressing Challenges} - -Time to talk about really really big machines? History of ``-scale'' -suffix and timeline showing performance of top500 leaders. Current -state of play and hurdles to get to Exascale. diff --git a/_deprecated_sources/openshmem-book/api.tex b/_deprecated_sources/openshmem-book/api.tex deleted file mode 100644 index a2b583245..000000000 --- a/_deprecated_sources/openshmem-book/api.tex +++ /dev/null @@ -1,41 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter{The \openshmem Application Programming Interface} - -Basically the routines and constants, plus commentary, like in the -specification. - -No point puling this in until we're near ready. diff --git a/_deprecated_sources/openshmem-book/authors.tex b/_deprecated_sources/openshmem-book/authors.tex deleted file mode 100644 index 900eb09ad..000000000 --- a/_deprecated_sources/openshmem-book/authors.tex +++ /dev/null @@ -1,43 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter*{Authors} -\addcontentsline{toc}{chapter}{Authors} - -\section*{Tony Curtis} - -\section*{Barbara Chapman} - -\section*{Steve Poole} diff --git a/_deprecated_sources/openshmem-book/book.bib b/_deprecated_sources/openshmem-book/book.bib deleted file mode 100644 index 34c07408f..000000000 --- a/_deprecated_sources/openshmem-book/book.bib +++ /dev/null @@ -1,243 +0,0 @@ -@MISC{pgas:forum, - AUTHOR = "{PGAS Forum}", - TITLE = "{PGAS Forum}", - howpublished = "\url{http://www.pgas.org/}" -} - -@MANUAL{sgi:manpages, - AUTHOR = "{SGI, Inc.}", - TITLE = "{SHMEM API Man Pages}", - howpublished = "\url{http://docs.sgi.com/}" -} - -@ARTICLE{mpi:standard, - TITLE = "{A Message Passing Interface standard}", - JOURNAL = {International Journal of Supercomputer}, - VOLUME = {8}, - NUMBER = {3/4}, - PAGES = {159-416}, - MONTH = "June", - YEAR = {1994} -} - -@MANUAL{Cray:UnicosMP, - AUTHOR = "{Cray, Inc.}", - TITLE = "{Man Page Collection (Unicos MP): Shared Memory Access SHMEM}", - PUBLISHER = "{Cray, Inc.}" -} - -@MANUAL{Cray:UnicosLC, - AUTHOR = "{Cray, Inc.}", - TITLE = "{Man Page Collection (Unicos LC): Shared Memory Access SHMEM}", - PUBLISHER = "{Cray, Inc.}" -} - -@MANUAL{Quadrics:Shmem, - AUTHOR = "{Quadrics, Ltd.}", - TITLE = "{The SHMEM Programming Manual}" -} - -@ARTICLE{mpi2:standard, - TITLE = "{Message Passing Interface Forum: MPI2: A message passing interface standard}", - JOURNAL = {High Performance Computing Applications}, - VOLUME = {12}, - NUMBER = {1-2}, - PAGES = {1-299}, - YEAR = {1998} -} - -@BOOK{Quadrics:QsNet, - AUTHOR = "{Quadrics, Ltd.}", - TITLE = "{User Manual - Running Parallel Programs with RMS and QsNet}" -} - -@techreport{gasnet, - author = "Dan Bonachea", - title = "{GASNet specification, v1.1}", - institution = "Computer Science Department, University of California, Berkeley", - year = "2002" -} - -@techreport{chapel, - author = "{Cray, Inc.}", - title = "{Chapel Language Specification}", - institution = "{Cray, Inc.}", - year = "2010" -} - -@techreport{cray:t3dtechsummary, - author = "{Cray, Inc.}", - title = "{Cray T3D Technical Summary}", - institution = "{Cray, Inc.}", - year = "1993" -} - -@article{armci, - author = "J. Nieplocha and V. Tipparaju and M. Krishnan and D. Panda", - title = "{High Performance Remote Memory Access Comunications: The ARMCI Approach}", - journal = "International Journal of High Performance Computing and Applications", - volume = "20(2)", - pages = "233-253", - year = "2006" -} - -@MANUAL{cray:t3d, - AUTHOR = "{Cray, Inc.}", - TITLE = "{CRAY T3D System Architecture Overview Manual}" -} - -@MANUAL{quadrics:hp, - AUTHOR = "{Quadrics, Ltd.}", - TITLE = "{HP AlphaServer SC User Guide}" -} - -@MANUAL{sicortex:shmem, - AUTHOR = "{SiCortex}", - TITLE = "{SiCortex System Programming Guide}" -} - -@article{coarray, - author = "R.W. Numrich and J.K. Reid", - title = "{Co-Array Fortran for parallel programming}", - journal = "Fortran Forum", - volume = "17(2)", - year = "1998" -} - -@MANUAL{upc, - AUTHOR = "{S\'{e}bastien Chauvin, Proshanta Saha, Fran\c{c}ois Cantonnet, Smita Annareddy, Tarek El-Ghazawi}", - TITLE = "{UPC Manual}" -} - -@misc{sgi_tut_000, - title="{SHMEM: An Overview and Brief History}", - year="2010 SC10 OpenSHMEM Birds of a Feather Session", - author="Karl Feind" -} - -@misc{mailman, - title="{GNU MailMan: User Manual}", - author="GNU", - howpublished="\url{http://www.list.org/}" -} - -@misc{gcc, - title="{GNU Compiler Suite}", - author="GNU", - howpublished="\url{http://gcc.gnu.org/}" -} - -@misc{intelcomp, - title="{Intel Compiler Suite}", - author="{Intel, Inc.}", - howpublished="\url{http://software.intel.com/en-us/articles/intel-compilers/}" -} - -@misc{tlsf, - title="{Two Level Segregated Fit Memory Allocator}", - author="Matthew Conte", - howpublished="\url{http://tlsf.baisoku.org/}" -} - -@misc{valgrind, - title="{Valgrind Instrumentation Framework}", - author="Julian Seward et al.", - howpublished="\url{http://valgrind.org/}" -} - -@misc{dlmalloc, - title="{Doug Lea's Memory Allocator}", - author="Doug Lea", - howpublished="\url{http://g.oswego.edu/dl/html/malloc.html}" -} - -@misc{c99, - title="{C99 Language Standard}", - author="ISO", - howpublished="\url{http://en.wikipedia.org/wiki/C99}" -} - -@article{Mellor-Crummey:1991:ASS:103727.103729, - author = {Mellor-Crummey, John M. and Scott, Michael L.}, - title = {Algorithms for scalable synchronization on shared-memory multiprocessors}, - journal = {ACM Trans. Comput. Syst.}, - volume = {9}, - issue = {1}, - month = {February}, - year = {1991}, - issn = {0734-2071}, - pages = {21--65}, - numpages = {45}, - url = {http://doi.acm.org/10.1145/103727.103729}, - doi = {http://doi.acm.org/10.1145/103727.103729}, - acmid = {103729}, - publisher = {ACM}, - address = {New York, NY, USA}, -} - -@misc{zpl, - title="{The ZPL Programming Language}", - author="University of Washington", - howpublished="\url{http://www.cs.washington.edu/research/zpl/home/index.html}" -} - -@misc{hpf, - title="{The High Performance Fortran Programming Language}", - author="Rice University", - howpublished="\url{http://dacnet.rice.edu/}" -} - -@misc{mvapich2-x, - title="{MVAPICH2-X (Unified MPI+PGAS Communication Runtime over OpenFabrics/Gen2 for Exascale Systems)}", - author="Ohio State University", - howpublished="\url{http://mvapich.cse.ohio-state.edu/overview/mvapich2x/}" -} - -@misc{ProPack, - title="{ProPack Suite of Performance Optimization Libraries and Tools}", - author="SGI", - howpublished="\url{http://www.sgi.com/products/software/propack.html}" -} - -@misc{ASLR, - title="{Address Space Layout Randomization}", - author="Wikpedia", - howpublished="\url{http://en.wikipedia.org/wiki/Address_space_layout_randomization}" -} - - -@misc{Fortress, - title="{The Fortress Language}", - author="Oracle Inc.", - howpublished="\url{https://java.net/projects/projectfortress/pages/Home}", -} - -@misc{DARPA:HPCS, - title="{DARPA High Productivity Computing Systems}", - author="U.S. Gov.", - howpublished="\url{http://www.darpa.mil/Our_Work/MTO/Programs/High_Productivity_Computing_Systems_%28HPCS%29.aspx}" -} - -@misc{SPMD, - title="{Single Program, Multiple Data}", - author="Wikipedia", - howpublished="\url{https://en.wikipedia.org/wiki/SPMD}" -} - -@misc{NUMAlink, - title="{NUMAlink System Interconnect}", - author="SGI", - howpublished="\url{https://en.wikipedia.org/wiki/NUMAlink}" -} - -@misc{scalemp, - title="{ScaleMP Virtualization}", - author="ScaleMP", - howpublished="\url{http://www.scalemp.com/}" -} - -@misc{pkg-config, - title="{pkg-config tool}", - author="freedesktop.org", - howpublished="\url{http://www.freedesktop.org/wiki/Software/pkg-config/}" -} diff --git a/_deprecated_sources/openshmem-book/example.tex b/_deprecated_sources/openshmem-book/example.tex deleted file mode 100644 index 339763c8a..000000000 --- a/_deprecated_sources/openshmem-book/example.tex +++ /dev/null @@ -1,381 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter{\openshmem By Example} -\label{chp:example} - -In this chapter we introduce the most used features of \openshmem -through example programs. - -\section{First Steps} - -\subsection{Sequential Code} - -Our first example will be the ubiquitous ``hello world'' program. -Here is the standard, sequential version in C that will act as our -template: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{language=C}{programs/hello-seq.c} -\outputlisting{language=bash,caption={Expected Output}}{programs/hello-seq.output} -\vspace{0.1in} -\end{minipage} - -\subsection{Message Passing Interface} - -The Message Passing Interface (MPI) has become the \textit{de facto} -parallel and distributed ``go to'' paradigm. We will show the MPI -``hello world'' in listing~\ref{mpi-hello} as it is likely to be -familiar to many readers and can thus serve as a conceptual -launching-point: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=mpi-hello,language=MPI+C}{programs/hello-mpi.c} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{programs/hello-mpi.output} -\vspace{0.1in} -\end{minipage} - -Items of interest: - -\begin{itemize} -\item the lifetime of the MPI environment is enclosed by - initialization via \texttt{MPI\_Init()} (line 9) and finalization - via \texttt{MPI\_Finalize()} (line 13). -\item the MPI environment is queried (lines 10, 11) to discover how - many processors are taking part in this parallel program, and which - ``rank'' an individual execution has -\item the order in which lines of output appear will vary from run to - run because the constituent processes of the MPI program run - concurrently and not in sequence -\end{itemize} - -\subsection{\openshmem} - -Now let us have a look at an equivalent OpenSMHEM program in -listing~\ref{openshmem-hello}: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=openshmem-hello,language=OSH+C}{programs/hello-openshmem.c} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{programs/hello-openshmem-c.output} -\vspace{0.1in} -\end{minipage} - -The programs are quite similar in their overall structure but here are -the main differences in the \openshmem version: - -\begin{itemize} -\item the initialization call \texttt{start\_pes()}, (line 9) has a - single integer argument, 0, which is ignored~\footnote{The unused - argument is for compatibility with older SHMEM implementations.}. -\item there is no explicit finalize call, either a return from - \texttt{main()} (line 13) or an explicit \texttt{exit()} acts as an - implicit \openshmem finalization. -\item as in program ~\ref{mpi-hello} the order in which lines appear - in the output is not fixed. -\end{itemize} - -\openshmem also has a Fortran API, so for completeness we now give the -same program written in Fortran in listing~\ref{openshmem-hello-f90}: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=openshmem-hello-f90,language=OSH+F}{programs/hello-openshmem.f90} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{programs/hello-openshmem-f90.output} -\vspace{0.1in} -\end{minipage} - -\section{Communication} - -Of course, the whole point of MPI and \openshmem is communication. -\openshmem supports both point-to-point communication, in which one PE -exchanges data with another individual PE, and collective -communication, in which multiple PEs engage in a specified operation -as a group. - -\subsection{Point-to-Point Communication} -\label{sec:p-to-p} - -Let us now have a look at a very simple \openshmem example in -listing~\ref{rotput} in which each PE sends a value to the PE above it -(so 0 sends to 1, 1 to 2, 2 to 3, and so on until the last PE wraps -around back to PE 0). - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=rotput,language=OSH+C}{programs/rotate-put.c} -\outputlisting{language=bash,caption={Expected Output (4 - processors)}}{programs/rotate-put.output} -\vspace{0.1in} -\end{minipage} - -Points to notice: - -\begin{itemize} -\item the target variable ``d'' (line 5) is a global variable to make - it remotely accessible -\item we use a short form of the ``put'' call for a single value (line - 22) -\item there is a global barrier \emph{after} the ``put'' (line 24): - there is no need to have a global barrier \emph{before} the ``put'' - because only the local value needs to be ready, and the global - variable is pre-allocated in the executable. -\end{itemize} - -\subsection{Collective Communication} - -Here is an example of \openshmem's broadcast: this is a ``1-to-others'' -broadcast in which one designated ``root'' PE sends data to the other -PEs in the ``active set''. The root PE does not update data on -itself, though. - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{language=OSH+C}{programs/broadcast.c} -\outputlisting{language=bash,caption={Expected Output (3 processors, - output sorted for clarity)}}{programs/broadcast.output} -\vspace{0.1in} -\end{minipage} - -Points to notice: - -\begin{itemize} -\item collective routines use a symmetric synchronization variable - (defined on line 6) \todo{cross-ref with more detailed discussion of - synch. vars} -\item the synchronization variable must be initialized \emph{on all - PEs} before use (lines 28--31) -\item \texttt{shmem\_barrier\_all} (line 32) ensures the data and the - synchronization variable are initialized \emph{everywhere} before - proceeding with the collective routine. -\item the size of the data updated depends on the number of PEs in the - program overall, so we dynamically allocate symmetric memory (lines - 18--27) (checking the return value of \texttt{shmalloc} omitted for - brevity) -\item dynamically allocated memory should be released when not needed - any more (lines 42--43) -\end{itemize} - -\section{Atomic Operations} - -\subsection{Arithmetic} - -There are 2 kinds of atomic arithmetic operations in \openshmem. In -both cases, one PE initiates an atomic change to a variable on another -PE. - -\begin{description} - \item[{remote increment/addition}] the remote variable is updated - \item[{fetch \& remote increment/addition}] the remote variable is - updated, and the \emph{previous} value of the remote variable is - returned to the caller -\end{description} - -\subsubsection{Addition} - -Here is an example of an atomic addition: the program in listing~\ref{addprog} -computes the sum from $1$ to $N-1$: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=addprog,language=OSH+C}{programs/add.c} -\outputlisting{language=bash,caption={Expected Output (4 processors)}}{programs/add.output} -\vspace{0.1in} -\end{minipage} - -Points to notice: - -\begin{itemize} -\item the counter is initialized to 0 via the global declaration (line - 5) -\item all PEs $> 0$ add their PE number \emph{atomically} to the - variable ``counter'' on PE 0 (line 17) -\item the order in which the additions happen is unknown in advance, - but the atomic guarantee means that 2 PEs cannot interfere with each - other's updates -\end{itemize} - -\subsubsection{Increment} - -The increment operation in listing~\ref{incprog} is a special case of -addition. The program simply counts the number of $PEs - 1$. - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=incprog,language=OSH+C}{programs/inc.c} -\outputlisting{language=bash,caption={Expected Output (4 processors)}}{programs/inc.output} -\vspace{0.1in} -\end{minipage} - -Points to notice: - -\begin{itemize} -\item the increment call does not need to pass a value to be added - since it is 1 by definition (line 17) -\end{itemize} -\subsubsection{Fetch and Add} - -The fetch-and-add call in listing~\ref{faddprog} adds a value to a -target variable and returns the previous value of that variable to the -caller PE. - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=faddprog,language=OSH+C}{programs/fadd.c} -\outputlisting{language=bash,caption={Expected Output (2 processors)}}{programs/fadd.output} -\vspace{0.1in} -\end{minipage} - -\subsubsection{Fetch and Increment} - -The fetch-and-increment operation in listing~\ref{fincprog} is a -special case of fetch-and-addition. - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=fincprog,language=OSH+C}{programs/finc.c} -\outputlisting{language=bash,caption={Expected Output (2 processors)}}{programs/finc.output} -\vspace{0.1in} -\end{minipage} - -\subsection{Swaps} - -\subsubsection{Unconditional} - -An \openshmem swap atomically writes a new value from the caller PE -into a variable on the target PE, and returns the old value to the -caller. - -\todo{NEED swap EXAMPLE} - -\subsubsection{Conditional} - -An \openshmem conditional swap atomically writes a new value from the -caller PE into a variable on the target PE, but only if a condition -value matches. In either case, the old value of the target variable -is returned to the caller. - -\todo{NEED cswap EXAMPLE} - -\section{Locks} - -Locks in \openshmem are \emph{distributed}. A lock is built on a -symmetric variable whose value is managed by PEs through the API -below. The \openshmem lock is effectively a mutual-exclusion device -that allows precisely \emph{one} PE access to a code region at a time. - -\subsection{Set Lock} - -The - -\begin{verbatim} -shmem_set_lock (long *L) -\end{verbatim} - -% \apilisting{shmem\_set\_lock (long *L)} - -call allows the calling PE to ``grab'' a symmetric variable and to -then block access to a code region until or unless the lock is cleared -(v.i.). - -\texttt{shmem\_set\_lock} returns, and the calling PE proceeds, only -when no other PE has locked the named symmetric variable. - -The lock variable must be initialized to ``0'' before first use. -After that, the lock API calls must be used to manage the variable. - - -\subsection{Clear Lock} - -A PE that holds a lock on the symmetric variable ``L'' can release -that lock by calling - -\begin{verbatim} -shmem_clear_lock (long *L) -\end{verbatim} - -Before release, all pending \openshmem communications initiated in the -locked region are completed. - -\subsection{Test Lock} - -A PE can check whether a lock is available by calling - -\begin{verbatim} -shmem_test_lock (long *L) -\end{verbatim} - -If the lock on ``L'' is currently held by another PE, -\texttt{shmem\_test\_lock} returns 1 (true) immediately and does not -set the lock. - -If the lock on ``L'' is currently clear, \texttt{shmem\_test\_lock} -claims the lock and returns 0 (false). - -\subsection{Use of Locks} - -% \todo{want a good example of where locks would be useful. -% E.g.\ managing a distributed data structure of some kind} - -Consider a program that implements a distributed data structure: each -PE manages part of the overall structure and any PE can update values -on other PEs. An example of such a structure is a distributed hash -table. In such a table, a hash function maps keys to a particular -table index, where associated values are then stored (maybe as a -linked list or tree). - -When updates are performed, the presence of multiple writers -(i.e.\ multiple PEs that could be updating entries arbitrarily at the -same time) means that 2 PEs could collide and corrupt the table by -writing new data on top of each other. - -Locks in \openshmem provide a way for the PE that is performing the -update to indicate that it ``owns'' that table entry on the other PE -and has exclusive access to write (and/or read) the locked index until -it releases the lock. - -\subsection{Use of Ordering} - -Talk about fence and quiet for sentinels that flag arrival of data. -There's an example of quiet in the man pages that should be expanded. - -Think this is the right place to discuss ordering since it has a -similar purposes to locks, namely putting a boundary around a region -with a separate advisory variable. diff --git a/_deprecated_sources/openshmem-book/foreword.tex b/_deprecated_sources/openshmem-book/foreword.tex deleted file mode 100644 index e58333316..000000000 --- a/_deprecated_sources/openshmem-book/foreword.tex +++ /dev/null @@ -1,39 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter*{Foreword} -\addcontentsline{toc}{chapter}{Foreword} - -Somebody famous tells us how important this book is. diff --git a/_deprecated_sources/openshmem-book/future.tex b/_deprecated_sources/openshmem-book/future.tex deleted file mode 100644 index 24713f2e8..000000000 --- a/_deprecated_sources/openshmem-book/future.tex +++ /dev/null @@ -1,158 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{The Future of the \openshmem Specification} - -Presenting ideas for extending or enhancing the API. A look at -future technology considerations, e.g.\ how exascale might impact -design decisions. - -We have a number of examples of proposed extensions in the SoW. - -\section{non-blocking puts and gets} - -put/get routines that return per-communication handles to the -caller. The handles can be tested later for completion (present in -Cray and Quadrics SHMEMs, for example) - -\section{split-phase collectives} - -e.g.\ split-phase shmalloc, barriers, broadcasts - -\section{locality} - -exposing information about node and cluster topology to the library -and/or its API - -\section{regularized namespace} - -currently routines are a mixture of ``shmem\_'' prefixes, -``start\_pes'', ``\_my\_pe'' and others. Providing an API with a -consistent naming scheme would be useful, e.g.\ - -\begin{lstlisting}[language=OSH2+C] -shmem_init (int *argc, char ***argv) - -shmem_finalize () - -shmem_malloc () - -shmem_my_pe () -\end{lstlisting} - -\section{Fortran module, C++ namespace} - -provide better language support - -\section{complex numbers} - -The C++ interface is basically the C one. There is one point of -contention, namely complex numbers. The SGI documentation refers only -to the use of C99 ``complex'' modifiers, not to C++'s -\texttt{complex}. The use of complex number routines (e.g.\ -reductions) in C++ is thus not clearly specified. - -For example, \texttt{shmem\_sum\_to\_all} has a C variant - -\begin{lstlisting}[language=OSH+C] -void shmem_complexd_sum_to_all (double complex *target, - double complex *source, - ...) -\end{lstlisting} - -but in C++, complex doubles would be represented as - -\begin{lstlisting}[language=OSH+C] -void shmem_complexd_sum_to_all (complex *target, - complex *source, - ...) -\end{lstlisting} - -\section{User-defined reductions} - -\section{thread-safety} - -providing thread-safe SHMEM routines that can operate in hybrid -threaded environments, e.g. alongside OpenMP; - -\section{fault-tolerance} - -\openshmem currently operates under the assumption that there is a -fixed set of PEs available at launch-time and that these remain -accessible for the entire life-time of the program run. Allowing -\openshmem to handle system failures such as nodes becoming -unavailable makes it more attractive for large-scale systems. - -2 routines, \texttt{shmem\_pe\_accessible} and -\texttt{shmem\_addr\_accessible} are available in the SGI API. These -are intended for determining the role of PEs in SGI's particular -environment, which can have mixed-mode MPI and SHMEM programs. -\texttt{shmem\_pe\_accessible} test whether a given rank of the -program is running MPI or SHMEM, and \texttt{shmem\_addr\_accessible} -tests whether a remote address can be reached as a SHMEM PE rather -than an MPI rank. - -However, these, or similar, routines could be retained for -fault-tolerance purposes. - -\section{Return Types} - -Alongside fault-tolerance, many of the routines in the current API are -``void'', that is, they return no value to the calling program. -\texttt{start\_pes} is one example. Adding return status information -would be useful to allow the program to gracefully trap errors or -other unexpected circumstances, e.g. - -\begin{lstlisting}[language=OSH2+C] -int -main (int argc, char *argv[]) -{ - int s = shmem_init (&argc, &argv); - - if (s != 0) - { - fprintf (stderr, ..., shmem_error (s), ...); - } - - ... -} -\end{lstlisting} - -\section{tools} - -that help programming in PGAS environments - -\section{where is PGAS going in general?} diff --git a/_deprecated_sources/openshmem-book/introduction.tex b/_deprecated_sources/openshmem-book/introduction.tex deleted file mode 100644 index 829ba12eb..000000000 --- a/_deprecated_sources/openshmem-book/introduction.tex +++ /dev/null @@ -1,261 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter{Introduction to \openshmem} - -This is where we talk about all that history and PGAS stuff. - -\section{What is Parallel Programming?} - -Maybe a little history of sequential programming, and then talk about -concurrency vs.\ parallelism. - -\section{Parallel Programming Models} - -Shared-memory, vs.\ distributed: data- vs.\ task-centric. - -MPMD and SPMD~\cite{SPMD}. - -\section{What is a Global Address Space?} - -One notional memory that covers the entire extent of the program, even -across separate machines in a cluster. Could talk about NUMAlink~\cite{NUMAlink}, -ScaleMP~\cite{scalemp} \& virtualization technologies. - -\section{What is a Partitioned Global Address Space?} - -Contrast with GAS, concentrate on private and shared memory regions. - -Talk about PGAS languages and libraries with some examples. We've mentioned -them above but here we want to get into some detail. - -\subsection{Unified Parallel C} - -Unified Parallel C~\cite{upc} (UPC) is an extension to the well-known -C language. UPC has a single, shared memory space in which variables -``belong'' to a particular processor in an SPMD environment. - -\subsection{Co-Array Fortran} - -Co-Array Fortran~\cite{coarray} (CAF) is an extension to standard -Fortran that allows access to data stored in other \emph{places}. CAF -is now part of the Fortran 2008 specification. - -CAF partitions its data spaces into \emph{images} (analogous to MPI's -``ranks'' and \openshmem{}'s Processing Elements) numbered from $1..N$ in -keeping with Fortran's default style of array indexing. -Programs run in the SPMD style and access variables on images with -the bracket syntax, e.g.\ A(1)[2] is a reference to element 1 of array -``A'' on image 2. - -CAF preserves sequential semantics on such assignments, so -opportunities for overlap are minimal within the application source -code. Compiler optimization techniques such as code motion, hoisting -and loop fission/fusion will be required to analyze the code and -rearrange remote accesses by the compiler's runtime. - -\todo{References to compiler optimizations} - -\subsection{DARPA HPCS Languages} - -The DARPA HPCS program~\cite{DARPA:HPCS} was set up to develop tools to -support multi-petaflop systems. Languages developed under this -program include: - -\subsubsection{Chapel} - -Chapel~\cite{chapel} is a high-level language developed by Cray Inc. -It is a new language that draws primarily from earlier languages -ZPL~\cite{zpl} and HPF~\cite{hpf}. Chapel supports a multithreaded -\emph{global view} model that allows programmers to reason about -distributed data structures in a natural way. - -\subsubsection{X10} - -X10 is a language developed by IBM. Blah blah. The language is -implicitly parallel and object-oriented, and looks similar to Java or -Scala. - -\subsubsection{Fortress} - -Fortress~\cite{Fortress} is a language designed by Sun Microsystems. -It provides implicit parallelism with a syntax reminiscent of Haskell -and ML. Fortress was developed as part of the DARPA HPCS program, but -was not selected to continue in that program. The Fortress project -has since been closed and the language is no longer under development. - -\todo{citations} - -\section{History of SHMEM} - -SHMEM~\cite{sgi_tut_000} has a long history as a parallel programming -model, having been used extensively on a number of products since -1993, including Cray T3D, Cray X1E, the Cray XT3/4, SGI Origin, SGI -Altix, clusters based on the Quadrics interconnect, and to a very -limited extent, Infiniband based clusters. - -\begin{itemize} -\item A SHMEM Timeline - \begin{itemize} - \item Cray SHMEM - \begin{itemize} - \item SHMEM first introduced by Cray Research Inc. in 1993 for Cray T3D - \item Cray is acquired by SGI in 1996 - \item Cray is acquired by Tera in 2000 (MTA) - \item Platforms: Cray T3D, T3E, C90, J90, SV1, SV2, X1, X2, XE, XMT, XT - \end{itemize} - \item SGI SHMEM - \begin{itemize} - \item SGI purchases Cray Research Inc. and SHMEM was integrated into - SGI's Message Passing Toolkit (MPT) - \item SGI currently owns the rights to SHMEM and \openshmem - \item Platforms: Origin, Altix 4700, Altix XE, Altix ICE, Altix UV - \item SGI was purchased by Rackable Systems in 2009 - \item SGI and Open Source Software Solutions, Inc. (OSSS) signed a - SHMEM trademark licensing agreement, in 2010 - \end{itemize} - \item Other Implementations - \begin{itemize} - \item Quadrics (Vega UK, Ltd.) - \item Hewlett Packard - \item GPSHMEM - \item IBM - \item QLogic - \item Mellanox - \item University of Houston - \item University of Florida - \end{itemize} - \end{itemize} -\end{itemize} - -Despite being supported by a variety of vendors there is no standard -defining the SHMEM memory model or programming interface. Consistencies -(where they exist) and extensions across the various implementations have -been driven by the needs of an enthusiastic user community. The lack of a -SHMEM standard has allowed each implementation to differ in both interface -and semantics from vendor to vendor and even product line to product line, -which has to this point limited broader acceptance. - -\section{Towards an \openshmem standard} - -When comparing the various implementations of SHMEM and other RMA APIs, -we see that semantics vary dramatically. Not only do SHMEM RMA semantics -differ from other RMA implementations (as expected) but different SHMEM -implementations differ from each other. Support for primitive data types -varies. Discontiguous RMA operations and Atomic RMA operations are not -uniformly supported. Synchronization and completion semantics differ -substantially which can cause valid programs on one architecture to be -completely invalid on another. Symmetric data objects that dramatically -aid the programmer are unique to the SHMEM model but are not uniformly -supported. There are a number of capabilities that are available from -implementations that differ from the SGI SHMEM base-line version. Below -is a brief list of potential future additions to \openshmem: - -\begin{itemize} - \item Support for Multiple Host Channel Adapters (Rails) - \item Support for non-Blocking Transfers - \item Support for Events - \item Additional Atomic Memory Operations and Collectives - \item Locks - \begin{itemize} - \item shmem\_set\_lock - \item shmem\_clear\_lock - \item shmem\_test\_lock - \end{itemize} - \item Potential options for NUMA/Hybrid architectures - \item Additional communications transport mechanisms - \item \openshmem I/O library enhancements - \item \openshmem tools and Compiler enhancements - \item Enabling exa-scale applications -\end{itemize} - -An \openshmem standard can address the lack of uniformity across the -available SHMEM implementations. Standardization levels can be established -to provide a base level that all SHMEM implementations must support in -order to meet the standard, with higher levels available for additional -functionality and platform specific features. \\ - -In 2008, an initial dialogue was started between SGI and a small not-for-profit -company called Open Source Software Solutions, Inc.(OSSS). -The purpose of the dialogue was to determine if an agreement could be reached -for SGI's approval of an open source version of SHMEM that would serve -as an umbrella standard under which to unify all of the already existing -implementations into one cohesive API. As part of this discussion, OSSS -held a BOF on \openshmem at SC08 to discuss this plan. The BOF was well -attended by all of the vendors interested in supporting \openshmem/SHMEM, -and many of the interested parties in the SHMEM user community. The -unanimous opinion of the attendees favored continuing the process toward -developing \openshmem as a community standard. The final agreement -between SGI and OSSS was signed in 2010. The agreement allows OSSS to -use the name \openshmem and directly reference SHMEM going forward. The -base version of \openshmem V1.0 is based on the SGI man pages. The ``look -and feel'' of \openshmem needs to preserve the original ``look and feel'' -of the SGI SHMEM. \openshmem version 1.0 has been released and input -for version 2.0 is being actively solicited. - -There are a number of enhancements under discussion for version 2.0 and -future offerings. Some of the features specific to one implementation -or another as listed in the preceding tables and elements will be -incorporated into later versions of \openshmem. \openshmem will be -supported on a variety of commodity networks (including Infiniband -from both Mellanox and QLogic) as well as several proprietary -networks (Cray, SGI, HP, and IBM). - -The \openshmem mail reflector is hosted -at ORNL and can be joined by sending a request via the \openshmem list -server (GNU MailMan~\cite{mailman}) at -\begin{verbatim} -openshmem-join@email.ornl.gov -\end{verbatim} -Future enhancements and RFIs will be sent to developers and other interested -parties via this mechanism. - -Source code examples, Validation and -Verification suites, performance analysis and \openshmem compliance is -hosted at the \openshmem website at -\begin{verbatim} -http://www.openshmem.org/ -\end{verbatim} -and the \openshmem standard is owned and maintained by OSSS. - -With the inexorable march toward exa-scale, programming methodologies such -as \openshmem will certainly find their place in enabling extreme scale -architectures. By virtue of decoupling data motion from synchronization, -\openshmem exposes the potential for synergistic application improvements -by scaling more readily than two-sided models, and by minimizing data -motion, thus affording the possibility of concomitant savings in power -consumption by applications. These gains, along with portability, -programmability, productivity, and adoption, will secure \openshmem a -place for future extreme scale systems. diff --git a/_deprecated_sources/openshmem-book/main.tex b/_deprecated_sources/openshmem-book/main.tex deleted file mode 100644 index 5d92cee64..000000000 --- a/_deprecated_sources/openshmem-book/main.tex +++ /dev/null @@ -1,265 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\documentclass[oneside,english]{book} -\usepackage[T1]{fontenc} -\usepackage[latin9]{inputenc} -\setcounter{secnumdepth}{3} -\setcounter{tocdepth}{3} -\usepackage{babel} - -\usepackage[pdftex]{graphicx} -\usepackage[unicode=true]{hyperref} - -\usepackage[parfill]{parskip} % gaps between paragraphs - -\usepackage{mathtools} - -\usepackage{fancyhdr} -\pagestyle{fancy} -\fancyhead{} -\fancyfoot{} -\fancyhead[LO,LE]{\leftmark} -\fancyfoot[LO,LE]{DRAFT} -\fancyfoot[RO, LE]{\thepage} - -\usepackage[usenames,dvipsnames]{color} -\definecolor{ListingBG}{rgb}{0.91,0.91,0.91} -\usepackage{courier} - - -% -% in case SGI want us to put (tm) on all OpenSHMEM occurrences -% -% don't insert space if apostrophe or similar follows -\usepackage{xspace} -% -% \newcommand{\openshmem} {OpenSHMEM\xspace\textsuperscript{{\small \texttrademark}}} -\newcommand{\openshmem} {\mbox{OpenSHMEM}\xspace} - -\usepackage{listings} - -% these are the common listing settings - -\def\StandardListing { - \lstset { - basicstyle=\scriptsize\ttfamily, - backgroundcolor=\color{ListingBG}, - showspaces=false, - showstringspaces=false, - showtabs=false, - frame=tlBR, - frameround=tttt, - numbers=none, - caption=\lstname - } -} - -% annotated program source should be line numbered though - -\def\ProgramNumberedListing { - \StandardListing - \lstset { - numbers=left, - numberstyle=\footnotesize - } -} - -% new command to show program listings - -\newcommand{\numberedlisting}[2] { - \ProgramNumberedListing - \lstinputlisting[#1]{#2} - \StandardListing -} - -% new command to show program output or similar things - -\newcommand{\outputlisting}[2] { -\begin{minipage}{\linewidth} -\vspace{0.1in} - \lstinputlisting[#1]{#2} - \StandardListing -\vspace{0.1in} -\end{minipage} -} - -\lstdefinelanguage{MPI+C}[]{C}{ - classoffset=1, - morekeywords={ - MPI_Init, - MPI_Comm_size, MPI_Comm_rank, - MPI_Finalize - }, - keywordstyle=\color{orange}\textbf, - classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH+C}[]{C}{ - classoffset=1, - morekeywords={ - _SHMEM_BCAST_SYNC_SIZE, _SHMEM_SYNC_VALUE, - start_pes, - my_pe, _my_pe, shmem_my_pe, - num_pes, _num_pes, shmem_n_pes, - shmem_int_p, shmem_short_p, shmem_long_p, - shmem_int_put, shmem_short_put, shmem_long_put, - shmem_barrier_all, shmem_barrier, - shmalloc, shfree, shrealloc, - shmem_broadcast32, shmem_broadcast64, - shmem_short_inc, shmem_int_inc, shmem_long_inc, - shmem_short_add, shmem_int_add, shmem_long_add, - shmem_short_finc, shmem_int_finc, shmem_long_finc, - shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, - shmem_set_lock, shmem_test_lock, shmem_clear_lock, - shmem_long_sum_to_all, - shmem_complexd_sum_to_all, - }, - keywordstyle=\color{blue}\textbf, - classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH2+C}[]{OSH+C}{ - classoffset=1, - morekeywords={ - shmem_init, - shmem_finalize, - shmem_malloc, - shmem_my_pe, - shmem_error, - }, - keywordstyle=\color{blue}\textbf, - classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH+F}[]{Fortran}{ - classoffset=1, - morekeywords={ - SHMEM_BCAST_SYNC_SIZE, SHMEM_SYNC_VALUE, - start_pes, - my_pe, shmem_my_pe, - num_pes, shmem_n_pes, - shmem_int_p, shmem_short_p, shmem_long_p, - shmem_int_put, shmem_short_put, shmem_long_put, - shmem_barrier_all, shmem_barrier, - shpalloc, shpdeallc, shpclmove, - shmem_broadcast32, shmem_broadcast64, - shmem_broadcast4, shmem_broadcast8, - shmem_short_inc, shmem_int_inc, shmem_long_inc, - shmem_short_add, shmem_int_add, shmem_long_add, - shmem_short_finc, shmem_int_finc, shmem_long_finc, - shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, - shmem_set_lock, shmem_test_lock, shmem_clear_lock, - shmem_long_sum_to_all, - }, - keywordstyle=\color{blue}\textbf, - classoffset=0, - sensitive=false -} - -\newcommand{\HRule}{\rule{\linewidth}{0.5mm}} - -\usepackage{todonotes} - -\usepackage{subfig} -\captionsetup{belowskip=10pt} - -\usepackage[nottoc]{tocbibind} - -% -------------------------------------------------------------------------- - -\begin{document} - -% make sure the lstlisting gets set up with defaults -\StandardListing - -\include{titlepage} - -\tableofcontents - -\pagebreak - -\listoftodos -\addcontentsline{toc}{chapter}{List of Todos} - -\listoffigures -\addcontentsline{toc}{chapter}{List of Figures} - -\listoftables -\addcontentsline{toc}{chapter}{List of Tables} - -\lstlistoflistings -\renewcommand{\lstlistoflistings}{\listoflistings} -\addcontentsline{toc}{chapter}{List of Programs} - -\include{authors} - -\include{thankyou} - -\include{foreword} - -\include{about} - -\include{introduction} - -\include{nutshell} - -\include{example} - -\include{api} - -\include{programming-issues} - -\include{reference-implementation} - -\include{other-openshmems} - -\include{v&v} - -\include{performance} - -\include{openshmem-in-practice} - -\include{future} - - -\nocite{*} -\bibliographystyle{amsplain} -\bibliography{book} - -\end{document} diff --git a/_deprecated_sources/openshmem-book/nutshell.tex b/_deprecated_sources/openshmem-book/nutshell.tex deleted file mode 100644 index cda9d74ab..000000000 --- a/_deprecated_sources/openshmem-book/nutshell.tex +++ /dev/null @@ -1,475 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{\openshmem in a Nutshell} - -% A basic overview of \openshmem. Introduce symmetric memory, -% one-sided communications, overlap, and explain some of the terms -% we'll be seeing later (e.g.\ collective, atomic). - -In this chapter we will summarize the important concepts that underly -\openshmem. - -\section{Remotely Accessible Symmetric Memory} - -Distributed programming models partition a running program into -``processes''. Processes have separate memory spaces that are private -to each process.\footnote{Contrast processes with threads that share a - single memory space} PGAS models extend the memory space to include -areas of memory that can be accessed remotely. In \openshmem these -are known as ``symmetric memory'' and are the areas that can be read -or written by get, put and other communication calls. - -\openshmem provides \ref{itm:nkinds} kinds of remotely accessible memory: - -\begin{enumerate} - \item global variables pre-allocated at compile-time - \item \label{itm:nkinds} dynamic allocations at execution-time -\end{enumerate} - -Let us look at what each of these involves. - -\subsection{Global Symmetric Variables} - -Programming models make a distinction between ephemeral and persistent -data. - -Subroutine or function calls allocate private data and parameters on -the process' stack, and this exists only as long as the subroutine or -function exists. An interesting mistake is to return a pointer from a -C subroutine when that pointer has been allocated on the subroutine's -stack: upon return, the pointer will be invalid (although, depending -on what happens to that memory area, things may \emph{appear} to work -sometimes). - -Persistent data is stored in persistent areas of the memory of the -process. The way in which one specifies this is shown below for C and -Fortran: - -\vspace{0.2in} -\begin{table}[h] - \begin{center} - \caption{Classes of Global Symmetric Variables} - \begin{tabular}{|p{0.2\textwidth}|p{0.7\textwidth}|} - \hline - Language & Symmetric Variable \tabularnewline - \hline - \hline - C & Global variables outside of any subroutine \tabularnewline - & Variables declared \texttt{static} \tabularnewline - \hline - Fortran & \texttt{COMMON} blocks \tabularnewline - & Variables in main program \tabularnewline - & Variables declared \texttt{SAVE} \tabularnewline - \hline - \end{tabular} - \end{center} -\end{table} -\vspace{0.2in} - -Since the same process runs on each processing element (PE) in an -\openshmem program, global data is stored at the same location in each -process and thus is trivially symmetric\todo{At least, it is in the - ELF format, need to look up some more, e.g. XCOFF, Mach-O}. - -\subsection{Dynamically Allocated Symmetric Variables} - -The reader may be familiar with the C language's \texttt{malloc} -family of calls or Fortran's \texttt{allocatable}. Data allocated wby -such calls persists beyond the lifetime of the scope in which the -allocation occurred: a corresponding \texttt{free} or -\texttt{deallocate} call is used to return the allocated memory to the -system for further use. Mismanagement of such dynamic allocation -frequently leads to memory leaks that cause crashes or slowdowns. - -\openshmem extends the notion of dynamically managed memory with API -calls that allocate within a special area of memory that can be seen -by other processes. The layout of this memory is identical across all -processes in an \openshmem program, which is why it is termed -``symmetric''. - -It is important to note that \openshmem does not guarantee that the -symmetric data will necessarily be at exactly the same memory location -in each process, but only that the layout will be identical and that -each PE can address remote locations by referring to its local -symmetric variables. Some implementations may be able to really use -the same memory locations but others may not, and will therefore need -to translate addreses used on one PE to those of another PE. One -impediment is the use of Address Space Randomization~\cite{ASLR}. - -% \begin{itemize} -% \item ``readiness' of symmetric memory. This conflates 2 steps, -% that the memory has been -% \begin{enumerate} -% \item allocated; and then -% \item completely assigned a value -% \end{enumerate} -% \end{itemize} - - -\section{Point-to-Point Communication} - -\openshmem supports both remote writes (put) and reads (get) of data -on a target PE. The active PE (the one that performs the put/get) is -usually not the same PE as the target, but there's no reason it can't -be. There is an important difference in behavior between put and get. -A ``get'' can be thought of as a remote variable assignment: when the -subroutine returns, the local data has been updated and can be used. -However, in a ``put'', upon return we cannot assume that the data has -been written remotely: we need a subsequent synchronization call to -guarantee this. This is useful because splitting the send and -completion phases allows us to overlap local computation with remote -communication (and thus do both at once!). - -\openshmem allows us to transfer different sizes and arrangements of -data. These are the following kinds of transfer (both put and -get): - -\vspace{0.2in} -\begin{table}[h] - \begin{center} - \caption{Types of put/get Calls} - \begin{tabular}{|p{0.2\textwidth}|p{0.7\textwidth}|} - \hline - single-value & transfer from one source scalar variable to a target scalar \tabularnewline - \hline - contiguous & transfer from one source array to a target array \tabularnewline - \hline - strided & transfer from one array to another but with specified skip distances for both the source and target arrays \tabularnewline - \hline - \end{tabular} - \end{center} -\end{table} -\vspace{0.2in} - -Examples of these calls can be found in section~\ref{sec:p-to-p}. - -The calls all have variants that correspond to the type of the -variables. The precise names differ between C and Fortran to match -the type names present in those languages; in C, there are, amongst -others, put/get calls for int, long, and double; in Fortran, amongst -others, character, int4, real8. - -\section{Synchronization} - -\openshmem put calls proceed without knowing if the data has been -stored at the target. This means we need a mechanism to ensure that -pending stores have in fact occurred. \openshmem provides 2 calls for -this: - -\subsection{Fence} - -A \texttt{shmem\_fence} ensures that all puts to a particular PE will -complete before another put to that PE completes. Puts to other PEs -that are in progress are not affected by fence. - -Fence is a partial ordering. - -\subsection{Quiet} - -Quiet, on the other hand, imposes a complete ordering across all -pending put calls. - -\texttt{shmem\_quiet} ensures that all puts to \emph{all} PEs complete -before any new put after the quiet completes. - -\subsubsection{Demonstrating fence vs. quiet} - -Let's suppose we have an \openshmem program with 4 PEs. On PE 0, for -example, there are 3 outbound logical queues, one per other PE: - -\todo{Obviously need proper fancy figure here} -\vspace{0.2in} -\begin{verbatim} - 1 2 3 - --- --- --- - | | | | | | - | | | | | | - | | | X | | | - | X | | X | | | - | X | | X | | | - | X | | X | | | - | X | | X | | X | - --- --- --- -\end{verbatim} -\vspace{0.2in} - -After a fence, one or more \texttt{shmem\_put}s to a particular PE -will cause the corresponding outbound queue to drain before the puts -complete. Puts to other PEs are not affected. - -After a quiet, one or more \texttt{shmem\_put}s to \emph{any} PE will -cause \emph{all} outbound queues to drain before the puts complete. - -\subsubsection*{Implementation Note} - -Libraries may choose to implement \texttt{shmem\_fence} as -\texttt{shmem\_quiet} if this is more efficient for a particular -configuration. - -The reverse is not true, though: \texttt{shmem\_quiet} cannot be -implemented as \texttt{shmem\_fence} because quiet has stronger -semantics than fence. - -It may be that the interconnect hardware only manages a single, -tagged, out-bound queue for puts, and scanning it to identify traffic -to a particular PE is more expensive than simply blocking the queue as -a whole untll it drains. - -Hardware that manages a number of out-bound queues can simply drain -the affected queue in a fence, leaving the other queue(s) free to -continue communication. - -\section{Collective Communication} - -In many programs, it is necessary for some data to be made available -in a number of places, or for distributed data to be collected and -processed in some way. \openshmem provides routines that operate -``collectively'', that is, they involve a number of PEs acting in -concert, called an ``active set''. Currently \openshmem specifies -active sets through a ``triple'': - -\begin{enumerate} -\item start PE -\item stride (gap between PEs in the active set) -\item size (number of PEs in the active set) -\end{enumerate} - -Collective routines do not return until all PEs involved have finished -and data held locally has been stored. - -\subsection{Barriers} - -% barrier\_all and barrier(active set). - -A barrier call is a mechanism by which a number of PEs (an active set) -must wait for all those PEs to enter the barrier, before any can -leave. \openshmem provides a barrier call that operates on a named -active set and also a global barrier \texttt{shmem\_barrier\_all} that -applies unconditionally to all PEs in the program. The global barrier -ensures a flush (see quiet) of pending \openshmem communication before -continuing. - -A barrier, or other synchronization mechanism, is required before -accessing symmetric data that has been initialized. E.g.\ before -calling a collective that uses a synchronization array for the first -time, that array must have been initialized: all PEs calling the -collective must have initialized locally before entering the -collective, otherwise some PEs may enter and remotely access arrays -containing abitrary values on other PEs. This can lead to undefined -behavior where a PE believes it can proceed before data is ready -elsewhere: a sum reduction may then generate incorrect results. - -\subsection{Broadcasts} - -A broadcast is a collective call that transfers data from one PE to -all the othere in a given active set. The active set is specified as -usual, plus one of the PEs in that set is named as a ``root''. The -contiguous data from the source array on the root is then sent to the -target arrays on the other active set PEs (but \emph{not} on the root -itself). - -\subsection{Collects} - -A ``collect'' performs concatenation of data from a source array into -a target array on an active set of PEs. The concatenation is -performed in PE-order, i.e.\ the data from the first PE in the active -set is written first into the target, then data from the 2nd PE, and -so on. Each PE may contribute different amounts of data to the -target. - -In the special case where it is known that all PEs in the active set -will write \emph{exactly} the same amount of data, a variant -``fcollect'' call (``f'' standing variously for ``fixed'' or ``fast'': -take your pick) exists. This variant allows the implementation to be -more efficient because all PEs in the active set know in advance where -to write their data. - -After the collect, all participating PEs have a copy of the -concatenated data. - -\subsection{Reductions} - -A reduction is an associative binary operation. A well-known example -of this is ``factorial'' which multiplies a set of values together -into a single result: - -\vspace{0.2in} - -\begin{math} -n! = \prod_{i=1}^n i -\end{math} - -\vspace{0.2in} - -\noindent -So for example - -\vspace{0.2in} - -\begin{math} -6! = 6 * 5 * 4 * 3 * 2 * 1 -\end{math} - -\vspace{0.2in} - -\noindent -Note that the order in which the multiplication occurs does not -matter. - -The reductions in \openshmem operate across arrays of values on PEs in an -active set. Each PE contributes one value for each reduction, and the -results are available on all PEs in the active set. - -\subsubsection{Minimum and Maximum} - -These 2 routines have versions for various different numeric types -(integer and floating-point) and find the smallest (min) and largest -(max) value in the source arrays. For example: - -\vspace{0.2in} - -\begin{math} -max(2, 4, 6, 12, 4, 2, -6, 7, 9, 12, 8, 3) = 12 -\end{math} - -\subsubsection{Arithmetic} - -These 2 routines have versions for various different numeric types -(integer and floating-point) and find the sum and product (see the -above factorial example) of the source arrays. For example: - -\vspace{0.2in} - -\begin{math} -sum(6, 7, 8, 9) = 30 -\end{math} - -\subsubsection{Logical} - -These 2 routines have versions for various different numeric types -(integer and complex) and perform the following Boolean logic -operations: - -\begin{itemize} -\item or -\item and -\item exclusive or (xor) -\end{itemize} - -The operations are logical rather than bitwise, so the source value 0 -corresponds to ``false'', anything else to ``true''. For example: - -\vspace{0.2in} - -\begin{minipage}{\linewidth} -\begin{math} -or(32, 16, 0, 8) = 1 -\end{math} - -\begin{math} -and(32, 16, 4) = 1 -\end{math} - -\begin{math} -and(32, 0, 4) = 0 -\end{math} -\end{minipage} - -\section{Atomic Operations} - -An ``atomic memory operation'' (AMO) is an assignment to a variable -that occurs without interruption: 2 atomic operations cannot update -the same variable at the same time: we may not know in which order the -updates happen, but we know that both will occur completely and -separately. - -The AMOs in \openshmem are remote, and address symmetric variables -only. - -\subsection*{Caveat} - -The guarantee of atomicity applies only to interaction with other -AMOs. An unguarded put may write to a variable that is engaged in an -AMO at any time, and an operation executing outside of \openshmem can -also update memory~\footnote{A multithreaded program may have one - thread running \openshmem and other threads doing other work}. This -restriction is important for performance reasons on network interfaces -that offload certain operations from the CPU onto the interface. - -\subsection{Swaps} - -These AMOs take a new value, write it to a target variable and return -the previous value of that target variable. One call does this -unconditionally, another takes an extra ``compare'' value and the swap -only occurs if the provided value is equal to the target variable's -value. - -\todo{Swaps: real usage of these?} - -\subsection{Add and Increment} - -These AMOs add a constant to the value of a variable on a target PE. -The constant can be provided by the programmer in the add operation, -or is 1 in increment.~\footnote{Increment appears to be just a special - case of ``add'' but certain hardware may provide dedicated - instructions for increment that allow implementers to optimize.} - -\subsection{Fetch, with Add and Increment} - -These AMOs are the same as ``add and increment'' described above, but -they also return the previous value of the target variable to the -caller. - -\subsection{Distributed Locks} - -Distributed locks provide a mechanism to implement mutual exclusion or -critical sections. A lock applies to a symmetric variable: a PE that -enters a lock call guarded by a variable will be the only PE active -amongst other PEs also trying to acquire the lock. Thus a lock will -force the participating PEs to serialize their execution of the locked -section. - -An application involving, for example, a distributed hash table -\todo{DHT: ref to Maynard/Nakao submission} would update its contents -consistently by locking access to an index, so that only one PE can -manipulate the index at a time. - -Locks can seriously impact the performance of applications as PEs -often sit idle while waiting to acquire a lock. If locks can be -avoided, they should be. If not, the work done within a lock should -be as small and as quick as possible to minimize the overhead of the -lock. diff --git a/_deprecated_sources/openshmem-book/openshmem-in-practice.tex b/_deprecated_sources/openshmem-book/openshmem-in-practice.tex deleted file mode 100644 index 562fc7205..000000000 --- a/_deprecated_sources/openshmem-book/openshmem-in-practice.tex +++ /dev/null @@ -1,64 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{Using \openshmem in the Real World} - -\begin{itemize} - -\item Some real-life examples of interesting applications written in - \openshmem. GUPS, UTS, \ldots? - -\item We can take some programs that demonstrate various things that - tend to be of interest to scientific coders, e.g.\ matrix - manipulation, FFT and use of SHMEM as a harness around BLAS/LAPACK - programs. - -\item How to add SHMEM to existing programs to parallelize them. - -\item How to combine SHMEM with other programming models in hybrid programs. - -\item Will want some performance numbers/graphs here to make it - convincing. - -\item Ram has some benchmarky type programs that can go here. - -\item Can also talk about hybrid models, e.g.\ \openshmem for - communication harness, OpenMP intra-node, or \openshmem in purely - shared memory. - -\item What platforms would I want to use \openshmem on? - -\end{itemize} diff --git a/_deprecated_sources/openshmem-book/other-openshmems.tex b/_deprecated_sources/openshmem-book/other-openshmems.tex deleted file mode 100644 index 6efc09c2e..000000000 --- a/_deprecated_sources/openshmem-book/other-openshmems.tex +++ /dev/null @@ -1,45 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{Other Open-Source \openshmem Implementations} - -\section{Open-MPI} - -Currently in the development repository - -\section{ANL MPI-3} - -https://github.com/jeffhammond/oshmpi diff --git a/_deprecated_sources/openshmem-book/performance.tex b/_deprecated_sources/openshmem-book/performance.tex deleted file mode 100644 index 960ff636c..000000000 --- a/_deprecated_sources/openshmem-book/performance.tex +++ /dev/null @@ -1,58 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{Programming with \openshmem for High Performance} - -\section{Applications} - -We talk about \openshmem on high-end systems, so we need to give some -examples and indicate what the important features of each is. - -Describe best techniques for dragging good performance out of code. -E.g.\ tips on maximizing overlap, minimizing data motion, -understanding system layout. - -\section{Library Implementations} - -Also look at how to get good performance out of the library itself -on different platforms, not just applications written in \openshmem. - -Could also talk about how hardware platforms influence design -decisions in \openshmem implementations and what you could do to -leverage things like communication offload. We have examples of -Infiniband, Portals and intra-node. - -And, of course, we should also talk about UCCS. diff --git a/_deprecated_sources/openshmem-book/programming-issues.tex b/_deprecated_sources/openshmem-book/programming-issues.tex deleted file mode 100644 index 6989e5858..000000000 --- a/_deprecated_sources/openshmem-book/programming-issues.tex +++ /dev/null @@ -1,189 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{\openshmem Programming Pitfalls} - -Or better yet, how to avoid them and what to do, with handy-dandy -examples of code improvements. - -\section{When is a Put actually put?} - -\subsection{Assuming puts are sent when they're not} - -% When things are on-the-wire but haven't been received yet. Contrast -% with ``get''. How do I ensure this? Example needed. - -The \openshmem model is that of \emph{deferred} synchronization. We -will see below how this helps parallel programming. What this means -is that a ``put'' call returns and computation continues locally even -though the data involved in the ``put'' may not have been delivered -yet, which is described as \emph{on the wire}. - -The following code snippet shows this situation: - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=put-ordering,language=OSH+C}{programs/put-ordering.c} -\vspace{0.1in} -\end{minipage} - -When the 2 ``put'' calls have returned and local computation -continues, all we know is that the - -\begin{minipage}{\linewidth} -\begin{itemize} -\item source data from arrays \texttt{s1} and \texttt{s2} are en route -\item the contents of \texttt{s1} and \texttt{s2} may be changed - safely by the sender without affecting what is sent. -\end{itemize} -\end{minipage} - -It \emph{may} have been fully delivered to the destination PE, or it -may not. It is not until a synchronization point (here a global -barrier) that we can guarantee the data from the source arrays has -been written to the target arrays on the destination PEs. - -\subsection{Assuming order-of-delivery} - -% Some programs that try to rely on o-o-d and how this can trip you up. - -\openshmem ``put'' calls do not complete when they return: the store -at the target is only guaranteed after a synchronization point such as -``fence''. Further, \openshmem knows nothing about the underlying -transport that actually moves data from a particular PE to a target. -The transport could be shared-memory, or a network like Infiniband or -Ethernet. Some transports manipulate the data being sent on them. -Examples of such manipulation are - -\begin{description} -\item[re-ordering:] the transport may choose to send data in ``put'' - calls in a different order to the order that appears to someone - reading the source code. -\item[asymmetric transports:] an underlying network may send data on - different paths to a target depending on circumstances, e.g.\ to - avoid congestion or to aid fan-out across machines in a cluster. -\item[coalescing:] multiple ``puts'' to a given target may be held - back and merged into a single transfer to reduce the amount of - traffic and avoid the latency incurred by separate ``puts''. -\end{description} - -Referring to iisting~\ref{put-ordering}, depending on the environment -where this program runs, an underlying transport may send the 2 -``puts'' in any order: the smaller 8 int put may occur first if the -current route to PE 2 is ``quieter'', even though there is -\emph{textually} another ``put'' before it. - -\section{Communication vs.\ Computation} - -\subsection{Failing to spot overlap opportunities} - -How to reorder applications to make the communication as -early-as-possible and synchronization as late-as-possible, and -therefore maximize the amount of work that can be done in the -meantime. - -\section{Synchronization} - -\subsection{Not synchronizing in the right places} - -E.g.\ where each PE initializes some data independently then tries -to use that data in a collective. How to think about ``readiness'' -in a symmetric way. - -Memory that is to be accessed remotely (e.g.\ it will be the target of -a ``put'' from another PE) must be ready on all PEs before use. By -\emph{ready} we mean it must be both - -\begin{itemize} -\item allocated, and; -\item initialized (if needed) -\end{itemize} - -before any communication call uses the memory. A common mistake is to -allocate memory, initialize it to some values, and then immediately -proceed to a sequence of ``puts'' or a collective routine: although -the allocation and initialize have completed locally, we do not know -if this sequence has finished on other PEs yet. - -This leads to attempts to use unallocated memory (often this will -cause a segmentation violation); or allocated memory will not have been -initialized and incorrect values will be transmitted or used, e.g.\ in a -collective call like a reduction. - -A synchronization point is required before such symmetrically -allocated memory is used. Often a global \texttt{shmem\_barrier\_all} -will be used as calls like \texttt{shmalloc} are globally collective. - -\begin{minipage}{\linewidth} -\vspace{0.1in} -\numberedlisting{label=synch-not-needed,language=OSH+C}{programs/synch-not-needed.c} -\vspace{0.1in} -\end{minipage} - -The barrier on line 7 is required to make sure that \texttt{pSync} has -been fully initialized across all participating PEs before any PE -enters the reduction. Without synchronization it is possible for PEs -to ``fall through'' if \texttt{pSync} accidentally has a value that -indicates the PE can start to walk through the barrier. - -\subsection{Over-synchronizing in the wrong places} - -% Sometimes you're not sure whether you need a synchronization at some -% point and may put one in where it is not needed, or you're not -% exploiting the overlap paradigm and are instead trying to -% coerce \openshmem into a matched send-receive mode. - -It is also possible to introduce unnecessary synchronizations. These -do not affect the \emph{correctness} of the program, but can introduce -a slowdown. Collective reduction routines, for example, ensure that -the reduced data has been stored on all participating PEs upon return. -Therefore there is no need to introduce a further synchronization -before examining the results. - -The global barrier on line 11 of listing~\ref{synch-not-needed} is not -needed, as the \\ \texttt{shmem\_long\_sum\_to\_all} has already made -sure that remote stores of \texttt{target} have been completed and -\texttt{target} is safe to use on all PEs that participated in the -reduction.~\footnote{the content of \texttt{target} is unchanged on PEs - outside of the active set.} - -\subsection{Summary: too little or too much synchronization} - -Too little synchronization will lead to incorrect results or other -aberrant behavior such as hangs or program crashes. - -Too much synchronization will not introduce any change of behavior, -but may introduce slowdown due to the overhead of extra calls. diff --git a/_deprecated_sources/openshmem-book/programs/add.c b/_deprecated_sources/openshmem-book/programs/add.c deleted file mode 100644 index 5a20d7f1c..000000000 --- a/_deprecated_sources/openshmem-book/programs/add.c +++ /dev/null @@ -1,28 +0,0 @@ -#include - -#include - -int counter = 0; - -int -main (int argc, char *argv[]) -{ - int me; - - start_pes (0); - me = _my_pe (); - - if (me > 0) - { - shmem_int_add (&counter, me, 0); - } - - shmem_barrier_all (); - - if (me == 0) - { - printf ("counter = %d\n", counter); - } - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/add.output b/_deprecated_sources/openshmem-book/programs/add.output deleted file mode 100644 index db49bfa8f..000000000 --- a/_deprecated_sources/openshmem-book/programs/add.output +++ /dev/null @@ -1 +0,0 @@ -counter = 6 diff --git a/_deprecated_sources/openshmem-book/programs/amo.c b/_deprecated_sources/openshmem-book/programs/amo.c deleted file mode 100644 index d942b6410..000000000 --- a/_deprecated_sources/openshmem-book/programs/amo.c +++ /dev/null @@ -1,127 +0,0 @@ -#include -#include -#include - -// const int tries = 1000000; -int tries = 880; - -#ifdef TEST64BIT -typedef long locktype; -#else -typedef int locktype; -#endif - - -int -main(int argc, char *argv[]) -{ - int tpe, other; - long i; - struct { - locktype a; - locktype b; - } *twovars; - long numfail = 0; - long numfailsg = 0; - long numfailpg = 0; - - if (argc > 1) { - tries = atoi(argv[1]); - } - - start_pes(0); - tpe = 0; - other = _num_pes() - 1; - - twovars = shmalloc(sizeof(*twovars)); - if (_my_pe() == 0) { - printf("Element size: %ld bytes\n", sizeof(locktype)); - printf("Addresses: 1st element %p\n", &twovars->a); - printf(" 2nd element %p\n", &twovars->b); - printf("Iterations: %d target PE: %d other active PE: %d\n", - tries, tpe, other); - } - twovars->a = 0; - twovars->b = 0; - - shmem_barrier_all(); - - - if (_my_pe() == tpe) { - // put two values alternately to the 1st 32 bit word - long expect, check; - - for (i=0; ia = expect; - if (sizeof(locktype) == sizeof(int)) { - check = shmem_int_g((void*)&twovars->a, tpe); - } else if (sizeof(locktype) == sizeof(long)) { - check = shmem_long_g((void*)&twovars->a, tpe); - } - if (check != expect) { - if (numfailsg < 2) { - printf("error: iter %ld get-after-store returned %ld expected %ld\n", i, check, expect); - } - numfailsg++; - numfail++; - } - } - printf("PE %d done doing stores and gets\n",_my_pe()); - - for (i=0; ia, expect, tpe); - check = shmem_int_g((void*)&twovars->a, tpe); - } else if (sizeof(locktype) == sizeof(long)) { - shmem_long_p((void*)&twovars->a, expect, tpe); - check = shmem_long_g((void*)&twovars->a, tpe); - } - if (check != expect) { - if (numfailpg < 2) { - printf("error: iter %ld get-after-put returned %ld expected %ld\n", i, check, expect); - } - numfailpg++; - numfail++; - } - } - printf("PE %d done doing puts and gets\n",_my_pe()); - - - } else if (_my_pe() == other) { - // keep on atomically incrementing the 2nd 32 bit word - long oldval; - - for (i=0; ib, tpe); - } else if (sizeof(locktype) == sizeof(long)) { - oldval = shmem_long_finc((void*)&twovars->b, tpe); - } - if (oldval != i) { - printf("error: iter %ld finc got %ld expect %ld\n", i, oldval, i); - numfail++; - if (numfail > 10) { - printf("FAIL\n"); - abort(); - } - } - } - printf("PE %d done doing fincs\n",_my_pe()); - } - shmem_barrier_all(); - if (numfail) { - if (_my_pe() == tpe) { - printf("FAIL: %ld errors: %ld get-after-store %ld get-after-put\n", - numfail, numfailsg, numfailpg); - } else { - printf("FAIL: %ld errors\n", numfail); - } - } - shmem_barrier_all(); - if (_my_pe() == 0) { - printf("test complete\n"); - } - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/broadcast.c b/_deprecated_sources/openshmem-book/programs/broadcast.c deleted file mode 100644 index c92a3ed0c..000000000 --- a/_deprecated_sources/openshmem-book/programs/broadcast.c +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include - -#include - -long pSync[_SHMEM_BCAST_SYNC_SIZE]; - -int -main (void) -{ - long *target, *source; - int i, me, npes; - - start_pes (0); - me = _my_pe (); - npes = _num_pes (); - - source = (long *) shmalloc (npes * sizeof (*source)); - for (i = 0; i < npes; i += 1) - { - source[i] = i + 1; - } - target = (long *) shmalloc (npes * sizeof (*target)); - for (i = 0; i < npes; i += 1) - { - target[i] = -999; - } - for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) - { - pSync[i] = _SHMEM_SYNC_VALUE; - } - shmem_barrier_all (); - - shmem_broadcast64 (target, source, npes, 0, 0, 0, npes, pSync); - - for (i = 0; i < npes; i++) - { - printf ("%-8d %ld\n", me, target[i]); - } - - shmem_barrier_all (); - shfree (target); - shfree (source); - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/broadcast.output b/_deprecated_sources/openshmem-book/programs/broadcast.output deleted file mode 100644 index f5415be98..000000000 --- a/_deprecated_sources/openshmem-book/programs/broadcast.output +++ /dev/null @@ -1,9 +0,0 @@ -0 -999 -0 -999 -0 -999 -1 1 -1 2 -1 3 -2 1 -2 2 -2 3 diff --git a/_deprecated_sources/openshmem-book/programs/fadd.c b/_deprecated_sources/openshmem-book/programs/fadd.c deleted file mode 100644 index 6e01430d2..000000000 --- a/_deprecated_sources/openshmem-book/programs/fadd.c +++ /dev/null @@ -1,30 +0,0 @@ -#include - -#include - -int counter = 9; /* just some starting value */ - -int -main (int argc, char *argv[]) -{ - int me; - int pe_counter; - - start_pes (0); - me = _my_pe (); - - if (me == 1) - { - pe_counter = shmem_int_fadd (&counter, me, 0); - printf ("PE %d: PE 0 returned %d\n", me, pe_counter); - } - - shmem_barrier_all (); - - if (me == 0) - { - printf ("PE %d: counter = %d\n", me, counter); - } - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/fadd.output b/_deprecated_sources/openshmem-book/programs/fadd.output deleted file mode 100644 index 8c216540c..000000000 --- a/_deprecated_sources/openshmem-book/programs/fadd.output +++ /dev/null @@ -1,2 +0,0 @@ -PE 0: counter = 10 -PE 1: PE 0 returned 9 diff --git a/_deprecated_sources/openshmem-book/programs/finc.c b/_deprecated_sources/openshmem-book/programs/finc.c deleted file mode 100644 index 7823f309e..000000000 --- a/_deprecated_sources/openshmem-book/programs/finc.c +++ /dev/null @@ -1,30 +0,0 @@ -#include - -#include - -int counter = 99; /* just some starting value */ - -int -main (int argc, char *argv[]) -{ - int me; - int pe_counter; - - start_pes (0); - me = _my_pe (); - - if (me == 1) - { - pe_counter = shmem_int_finc (&counter, 0); - printf ("PE %d: PE 0 returned %d\n", me, pe_counter); - } - - shmem_barrier_all (); - - if (me == 0) - { - printf ("PE %d: counter = %d\n", me, counter); - } - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/finc.output b/_deprecated_sources/openshmem-book/programs/finc.output deleted file mode 100644 index 59aeb21b1..000000000 --- a/_deprecated_sources/openshmem-book/programs/finc.output +++ /dev/null @@ -1,2 +0,0 @@ -PE 0: counter = 100 -PE 1: PE 0 returned 99 diff --git a/_deprecated_sources/openshmem-book/programs/hello-mpi.c b/_deprecated_sources/openshmem-book/programs/hello-mpi.c deleted file mode 100644 index 34e57c007..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-mpi.c +++ /dev/null @@ -1,15 +0,0 @@ -#include -#include - -int -main (int argc, char *argv[]) -{ - int nprocs, me; - - MPI_Init (&argc, &argv); - MPI_Comm_size (MPI_COMM_WORLD, &nprocs); - MPI_Comm_rank (MPI_COMM_WORLD, &me); - printf ("Hello from %d of %d\n", me, nprocs); - MPI_Finalize (); - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/hello-mpi.output b/_deprecated_sources/openshmem-book/programs/hello-mpi.output deleted file mode 100644 index ccd55a0d2..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-mpi.output +++ /dev/null @@ -1,4 +0,0 @@ -Hello from 1 of 4 -Hello from 2 of 4 -Hello from 3 of 4 -Hello from 0 of 4 diff --git a/_deprecated_sources/openshmem-book/programs/hello-openshmem-c.output b/_deprecated_sources/openshmem-book/programs/hello-openshmem-c.output deleted file mode 100644 index 5509dc3b2..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-openshmem-c.output +++ /dev/null @@ -1,4 +0,0 @@ -Hello from 0 of 4 -Hello from 2 of 4 -Hello from 3 of 4 -Hello from 1 of 4 diff --git a/_deprecated_sources/openshmem-book/programs/hello-openshmem-f90.output b/_deprecated_sources/openshmem-book/programs/hello-openshmem-f90.output deleted file mode 100644 index e2f8b4075..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-openshmem-f90.output +++ /dev/null @@ -1,4 +0,0 @@ -Hello from 0 of 4 -Hello from 2 of 4 -Hello from 3 of 4 -Hello from 1 of 4 diff --git a/_deprecated_sources/openshmem-book/programs/hello-openshmem.c b/_deprecated_sources/openshmem-book/programs/hello-openshmem.c deleted file mode 100644 index 8aa279b41..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-openshmem.c +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include /* The shmem header file */ - -int -main (int argc, char *argv[]) -{ - int nprocs, me; - - start_pes (0); - nprocs = shmem_n_pes (); - me = shmem_my_pe (); - printf ("Hello from %d of %d\n", me, nprocs); - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/hello-openshmem.f90 b/_deprecated_sources/openshmem-book/programs/hello-openshmem.f90 deleted file mode 100644 index d4af67c35..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-openshmem.f90 +++ /dev/null @@ -1,16 +0,0 @@ -program hello - - include 'shmem.fh' - integer :: shmem_my_pe, shmem_n_pes - - integer :: npes, me - - call start_pes (0) - npes = shmem_n_pes () - me = shmem_my_pe () - - write (*, 1000) me, npes - - 1000 format ('Hello from', 1X, I4, 1X, 'of', 1X, I4) - -end program hello diff --git a/_deprecated_sources/openshmem-book/programs/hello-seq.c b/_deprecated_sources/openshmem-book/programs/hello-seq.c deleted file mode 100644 index b7b70781d..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-seq.c +++ /dev/null @@ -1,8 +0,0 @@ -#include - -int -main (int argc, char *argv[]) -{ - printf ("Hello\n"); - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/hello-seq.output b/_deprecated_sources/openshmem-book/programs/hello-seq.output deleted file mode 100644 index e965047ad..000000000 --- a/_deprecated_sources/openshmem-book/programs/hello-seq.output +++ /dev/null @@ -1 +0,0 @@ -Hello diff --git a/_deprecated_sources/openshmem-book/programs/inc.c b/_deprecated_sources/openshmem-book/programs/inc.c deleted file mode 100644 index a0fbaf24b..000000000 --- a/_deprecated_sources/openshmem-book/programs/inc.c +++ /dev/null @@ -1,28 +0,0 @@ -#include - -#include - -int counter = 0; - -int -main (int argc, char *argv[]) -{ - int me; - - start_pes (0); - me = _my_pe (); - - if (me > 0) - { - shmem_int_inc (&counter, 0); - } - - shmem_barrier_all (); - - if (me == 0) - { - printf ("counter = %d\n", counter); - } - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/inc.output b/_deprecated_sources/openshmem-book/programs/inc.output deleted file mode 100644 index 7773f646b..000000000 --- a/_deprecated_sources/openshmem-book/programs/inc.output +++ /dev/null @@ -1 +0,0 @@ -counter = 3 diff --git a/_deprecated_sources/openshmem-book/programs/ptp.c b/_deprecated_sources/openshmem-book/programs/ptp.c deleted file mode 100644 index 518f83494..000000000 --- a/_deprecated_sources/openshmem-book/programs/ptp.c +++ /dev/null @@ -1,104 +0,0 @@ -/* Passive Target Progress test */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef __GNUC__ -int pe_escape = 1; -#define mb() __sync_synchronize() -#else -/* this isn't entirely right, may need to fill in your compiler or - platform's memory barrier */ -volatile int pe_escape = 1; -#endif - -int A = 0; - -int -main(int argc, char **argv) -{ - int me, npes; - struct timeval now; - long t_start, t_end; - - start_pes(0); - me = _my_pe(); - npes = _num_pes(); - - if (npes < 4) { - if (me==0) - fprintf(stderr,"ERR: test requires 4 or more PEs\n"); - return 1; - } - shmem_barrier_all(); - - gettimeofday(&now, NULL); - t_start = (now.tv_sec * 1000000.0) + now.tv_usec; - - switch (me) { - case 0: - while (pe_escape) { - double pi, pi2, pi3; - int j; - - for (j=1; j <= 5000; j++) { - pi = (22.0 / 7.0) + (double) j; - pi2 = pi * (double) j; - pi3 = (pi2 * pi) / 1.2; - } - mb(); - } - gettimeofday(&now, NULL); - t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; - break; - - case 1: - shmem_int_inc(&A, 0); - gettimeofday(&now, NULL); - t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; - break; - - case 2: - while (1 != shmem_int_g(&A, 0)) { ; } - shmem_int_inc(&A, 0); - gettimeofday(&now, NULL); - t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; - break; - - case 3: - while (2 != shmem_int_g(&A, 0)) { ; } - shmem_int_p((int*) &pe_escape, 0, 0); // release PE0. - if (npes > 4) { - int i; - - for(i=4; i < npes; i++) - shmem_int_p((int*)&pe_escape, 0, i); // release PE0. - } - gettimeofday(&now, NULL); - t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; - break; - - default: - /* spin until released, A will never == 99, generate PE-0 traffic */ - while (99 != shmem_int_g(&A, 0) && pe_escape) { - mb(); - } - gettimeofday(&now, NULL); - t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; - break; - } - - if (me < 4) - fprintf(stderr,"[%d] elapsed usecs %ld A %d\n",me,t_end,A); - - shmem_barrier_all(); - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/put-ordering.c b/_deprecated_sources/openshmem-book/programs/put-ordering.c deleted file mode 100644 index b9fa07569..000000000 --- a/_deprecated_sources/openshmem-book/programs/put-ordering.c +++ /dev/null @@ -1,6 +0,0 @@ -if ( _my_pe () == 0 ) - { - shmem_int_put (&d1, &s1, 64, 1); /* 64 ints to PE 1 */ - shmem_int_put (&d2, &s2, 8, 2); /* different 8 ints to PE 2 */ - } -shmem_barrier_all (); diff --git a/_deprecated_sources/openshmem-book/programs/rotate-put.c b/_deprecated_sources/openshmem-book/programs/rotate-put.c deleted file mode 100644 index 201ec1143..000000000 --- a/_deprecated_sources/openshmem-book/programs/rotate-put.c +++ /dev/null @@ -1,29 +0,0 @@ -#include - -#include - -int d; /* symmetric target variable */ - -int -main (int argc, char *argv[]) -{ - int me, npes; - int rn; - - start_pes (0); /* Initialize this PE */ - - me = _my_pe (); /* I am this PE */ - npes = _num_pes (); /* There are this many PEs in the program */ - - rn = (me + 1) % npes; /* find my right neighbor (wrap-around) */ - - d = me; /* value to send */ - - shmem_int_p (&d, d, rn); /* send to right neighbor */ - - shmem_barrier_all (); /* everyone must have sent */ - - printf ("%d/%d: d = %d\n", me, npes, d); - - return 0; -} diff --git a/_deprecated_sources/openshmem-book/programs/rotate-put.output b/_deprecated_sources/openshmem-book/programs/rotate-put.output deleted file mode 100644 index 14df10f02..000000000 --- a/_deprecated_sources/openshmem-book/programs/rotate-put.output +++ /dev/null @@ -1,4 +0,0 @@ -1/4: d = 0 -3/4: d = 2 -0/4: d = 3 -2/4: d = 1 diff --git a/_deprecated_sources/openshmem-book/programs/synch-not-needed.c b/_deprecated_sources/openshmem-book/programs/synch-not-needed.c deleted file mode 100644 index 30fc8c773..000000000 --- a/_deprecated_sources/openshmem-book/programs/synch-not-needed.c +++ /dev/null @@ -1,14 +0,0 @@ -... - -for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) - { - pSync[i] = _SHMEM_SYNC_VALUE; - } -shmem_barrier_all (); - -shmem_long_sum_to_all (target, source, npes, 0, 0, 0, npes, pSync); - -... -... - -shmem_barrier_all (); diff --git a/_deprecated_sources/openshmem-book/reference-implementation.tex b/_deprecated_sources/openshmem-book/reference-implementation.tex deleted file mode 100644 index e702206b2..000000000 --- a/_deprecated_sources/openshmem-book/reference-implementation.tex +++ /dev/null @@ -1,615 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - - -\chapter{Implementation Of The Reference \openshmem Library} - -Along with the new specification for \openshmem, there is a reference -library that tracks the development of the specification, and acts as -a testbed for the evolution of the specification. This chapter -discusses the implementation of the reference library. - -\section{Implementation Strategy} - -One important goal of the reference implementation is not to be tied -to a particular environment, so it can be used on different platforms -and gain wider exposure for \openshmem. To this end we have used -GASNet~\cite{gasnet} initially to abstract away from a particular -interconnect and platform, although there is nothing to stop a more -direct approach being used to target specific hardware, as for example -in MVAPICH2-X~\cite{mvapich2-x}. Internally, the reference -implementation of \openshmem provides private APIs for memory -management, the communications layer, tracing and debugging, and -support for adaptivity to choose things like a barrier algorithm at -run-time. - -\subsection{GASNet} - -GASNet~\cite{gasnet} (\textbf{G}lobal-\textbf{A}ddress \textbf{S}pace -\textbf{Net}working) is a low-level library that has been used to -implement the runtime of a number of popular HPC PGAS languages, such -as Unified Parallel C~\cite{upc}, Chapel~\cite{chapel} and Co-Array -Fortran~\cite{coarray}. - -GASNet manages the start-up of a program and coordinates between the -``nodes'' that make up the program. It also makes available to all -nodes information about all the other nodes, such as where memory -segments have been allocated. The details of this are discusssed -below. - -GASNet provides an abstraction away from the underlying hardware on a -given platform. Hardware-specific interfaces are implemented as -modules, called ``conduits'': there are conduits for all the prevalent -communication systems in HPC systems, including networks such as -Ethernet, Infiniband, portals and Myrinet, and one specifically for -shared-memory multiprocessor machines (SMP). Support for onduits is -detected when GASNet is configured and can further be enabled or -disabled according to requirements. - -Three different communication scenarios are supported via ''segments'': - -\begin{table}[h] - \begin{center} - \caption{GASNet Segment Configurations} - \begin{tabular}{|p{0.2\textwidth}|p{0.7\textwidth}|} - \hline - Segment Name & Intended for \ldots \tabularnewline - \hline - \hline - fast & a ``reasonable'' size symmetric segment and - implementation-specific tuning for fastest data access \tabularnewline - \hline - large & a larger symmetric segment and this may impose some data access penalty \tabularnewline - \hline - everything & the entire process' memory space is available, not just a managed data segment \tabularnewline - \hline - \end{tabular} - \end{center} -\end{table} - -Segments are how GASNet manages the per-node memory areas that are -remotely accessible to other nodes. Not all conduits are supported -with all 3 segment configurations, which has implications for how -\openshmem's ``symmetric'' variables are handled, and this is discussed -below. - -\subsection{How \openshmem uses GASNet} - -GASNet provides access to the symmetric memory areas. A memory -management library marshals accesses to these areas during allocation -and freeing of symmetric variables in user code, usually through a -call like \texttt{shmalloc()} or \texttt{shfree()}. - -When the program does \texttt{gasnet\_attach()} and asks for segment -information, each PE has access to an array of segments, 1 segment -per PE. Each PE initializes a memory pool within its own segment. -The set up is handled either by GASNet internally (``fast''/''large'' -model) or by \openshmem itself (``everything'' model). The table of -segments allows any PE to know the virtual location and size of the -segment belonging to any other PE. - -If the platform allows it, GASNet can align all the segments at the -same address, which means that all PEs see the same address for -symmetric variables and there's no address translation. - -In the general case though, segments are not aligned (e.g.\ due to a -security measure like process address space randomization by the -OS). However, each PE can see the addresses of the segments of the -other PEs locally, and can therefore do address translation. - -Currently alignment is not checked for, so we're coding to the -``worst case scenario''. That just adds a \emph{small} overhead if -the segments are in fact aligned. The library should at some point -introduce code that differentiates between aligned and non-aligned -environments with optimized code for the former case (GASNet provides -a macro you can test against). - -\subsubsection{Segment Models} - -The library currently has best support for the ``everything'' -model. This model allows the entire process space to be addressed -remotely. Communication with dynamically allocated data and with -global data is equally easy. - -For the ``fast'' and ``large'' models, only the area of the process -memory managed by GASNet is exposed for remote access. This means -extra support has to be added to handle communication with global -variables, because only the symmetric heap is visible. -This is done via Active Messages. - -For the SMP conduit, PSHM support is required to run parallel threaded -programs with \openshmem. This excludes the ``everything'' model (at -least for the architectures to hand). - -\subsection{\openshmem Initialization \& Finalization} - -The \texttt{start\_pes} call handles setting up the \openshmem runtime, -and eventual shutdown. Shutdown is implicit in user code, there is no -call to do this in SGI SHMEM, so we register an exit handler to be -called when \texttt{main()} exits. (Cray SHMEM has an explicit -finalize call, however, and a proposal for a profiling interface has -suggested introducing this to \openshmem.) The segment exchange is a -target for future optimization: in large programs, the start-up time -will become burdensome due to the large number of address/size -communications. Strategies for avoiding this include lazy -initialization and hierarchical or directory-based lookups. - -\subsection{Communications Substrate} - -The \openshmem library has been written to sit on top of any -communications library that can provide the required -functionality. Initially we have targetted GASNet. - -\subsection{Servicing Communications} - -GASNet provides this functionality in some cases. The mainline code -needs to spin on variable waits (e.g.\ shmem\_long\_waituntil) to poll -GASNet, otherwise progress is automatic via a servicer unit. This is -implemented with a progress thread that polls in a continuous loop. - -\subsection{Memory Management} - -Initially we tried to use the TLSF~\cite{tlsf} library (as used in the -SiCortex SHMEM implementation) but this proved to have weird -interactions with Open-MPI. Tracking program progress with valgrind -\cite{valgrind} suggested that system memory calls were being -intercepted. - -So, following the Chapel~\cite{chapel} lead, we now use the -``dlmalloc''~\cite{dlmalloc} library to manage allocations in the -symmetric memory space. - -\subsection{Point-to-point routines} - -Point-to-point operations are a thin layer on top of GASNet. The -non-blocking put operations with implicit handles provide a way to -subsequently fence and barrier. However, tracking individual handles -explicitly with a hash table keyed on the address of symmetric -variables may give better performance, and this needs to be looked -into. - -The Quadrics extensions that add non-blocking calls into the API -proper have already been requested for the \openshmem development. An -initial attempt at these are already in the library and they pass the -Cray verification tests. - -\subsection{Atomic Operations} - -Atomic operations include swaps, fetch-and-add and locks (discussed -separately in \ref{sub:Locks}). The first two are handled via GASNet's -Active Messages. Increment was originally layered on top of add -(increment is simply a special case of add) but was rewritten with its -own handlers. The payload for increment can be ever so slightly -smaller than for add since there's no need to pass the value to -add. At large scale, even such a small saving could pay off. - -Earlier versions of the implementation had a single handler lock -variable per operation (one for all adds, one for all increments, -\emph{etc.}). However, there is now a hash table to allocate and -manage per-target-address handler locks. Large-scale atomic -operations, like add-scatters across multiple variables could easily -benefit from this, as the lock granularity then permits concurrent -discrete memory accesses. - -\subsection{\label{sub:Locks}Locks} - -\openshmem provides routines to claim, release and test global -locks~\footnote{Not to be confused with GASNet's Active Message - Handler Locks.}. These can be used for mutual-exclusion -regions. Our implementation is from the Quadrics library, which is a -version of the Mellor-Crummy-Scott -algorithm~\cite{Mellor-Crummey:1991:ASS:103727.103729}. The locks are -layered on top of \openshmem primitives, so there are no Elan -dependencies. - -\subsection{Barrier and broadcast} - -The initial version is naive, making the root of the broadcast a -bottleneck. This is partly intentional, to allow scope to explore -better algorithms and work out how to demonstrate and document the -improvements. We would like to collect some locality information -inside the library to help decide communication order inside these -algorithms: PEs that differ in rank by large amounts are likely to be -further away topologically too, so by sending to more distant PEs -first, we can stagger the network traffic and balance the latencies -better. A proper measurement of ``distance'' is needed here. The -``hwloc'' package~\cite{hwloc} provides a per-system distance metric -in NUMA terms. An extension could e.g.\ just multiply the distance by -some constant when moving off-node to penalize network traffic. - -\subsection{Collects} - -The collector routines concatenate (parts of) source arrays on an -active set of PEs into a target array on all of those PEs. - -\begin{description} -\item[collect] is the general routine in which each participating PE - can write different amounts of data. -\item[fcollect] is an optimization in which all participating PEs - \emph{must} contribute the same amount of data. This means we can - just pre-compute where each PE writes to their targets. -\end{description} - -Two approaches were considered for ``collect'': - -\begin{enumerate} -\item initial exchange of sizes ``from the left'' so each PE can - compute its write locations; then same as \texttt{fcollect} -\item wavefront: PEs wait for notification from PEs before them in the - set (lower numbered). This passes the offsets across the set. -\end{enumerate} - -The library uses the ``wavefront''. The ``exchange'' method -potentially generates a network storm as all PEs wait to work out -where to write, then all write at once, whereas the ``wavefront'' -staggers the offset notification with a wave of writes moving up the -PE numbers. - -\subsection{Reductions} - -Reductions coalesce data from a number of PEs into either a single -variable or array on all participating PEs. The coalescing involves -some kind of arithmetic or logic operation (e.g.\ sum, product, -exclusive-or). Currently probably naive, using gets. A version with -puts that can overlap communication and the computation of the -reduction operation should be more scalable. However, the code is -rather compact and all ops use the same template. A future version of -\openshmem may add user-defined reductions, and in fact the framework -for this is already in place: all that is needed is a specification of -the SHMEM API. - -\subsection{Address and PE Accessibility} - -\openshmem allows us to test whether PEs are currently reachable, and -whether addresses on remote PEs are addressable. GASNet is used to -``ping'' the remote PE and then we wait for an ``ack'' with a -configurable timeout. Remains to be seen how useful this is, and -whether it can be used for future fault tolerance issues. - -\subsection{Tracing Facility} - -This library contains \textquotedblleft{}trace -points\textquotedblright{} with categorized messages. These are listed -in section \ref{sec:Environment-Variables} - -A high-resolution clock is maintained to timestamp such messages. -Numerically sorting the output on the first field can thus help -understand the order in which events happened. - -\subsection{C++} - -The C++ interface is basically the C one. There is one point of -contention, namely complex numbers. The SGI documentation refers only -to the use of C99~\cite{c99} ``complex'' modifiers, not to C++'s -\texttt{complex}. The use of complex number routines -(e.g.\ reductions) in C++ is thus not clearly specified. - -\subsection{Fortran} - -The Fortran interface is very similar to that of C. The names of -various routines are different to accommodate the various type -differences, e.g.\ \texttt{shmem\_integer\_put()} instead of -\texttt{shmem\_int\_put()}. - -The biggest difference is in the symmetric memory management routines. -These have completely different names and parameters compared to the C -interface. - -The \openshmem implementation handles Fortran with a very thin wrapper -on top of C. Mostly this involves catching Fortran's pass-by-reference -variables and dereferencing them in the underlying C call. - -The main development has been on a CentOS platform with GNU -4.1.2-redhat. There seem to be some issues with this compilers' -handling of cray-pointers: even the simplest programs (no \openshmem -content at all) produce a segmentation fault. Later versions -(4.5.0 and newer) behave better. - -\section{Undefined Behavior} - -Many routines are currently specified only in terms of ``correct'' -behavior. What happens when something goes wrong is not always -specified. This section attempts to set out a few of these scenarios -\begin{itemize} -\item put to PE out of range: suppose we do a put to ``right -neighbor'' (\(pe + 1\)). The highest-numbered PE will attempt to -communicate with a PE that does not exist. -\item library not initialized: virtually all \openshmem routines will -have major problems if the library has not been -initialized. Implementations can handle this situation in different -ways. -\end{itemize} - -\section{Environment Variables\label{sec:Environment-Variables}} - -The behavior of the \openshmem library can be controlled via a number -of environment variables. For SGI compatibility reasons, we support -the ``SMA'' variables: - -% force the SGI table now -\clearpage - -\begin{table}[!h] - \begin{center} - \caption{SGI Environment Variables} - \begin{tabular}{|l|l|} - \hline - Variable & Function\tabularnewline - \hline - \hline - \texttt{SMA\_VERSION} & print the library version at start-up\tabularnewline - \hline - \texttt{SMA\_INFO} & print helpful text about all these environment variables\tabularnewline - \hline - \texttt{SMA\_SYMMETRIC\_SIZE} & number of bytes to allocate for symmetric heap\tabularnewline - \hline - \texttt{SMA\_DEBUG} & enable debugging messages\tabularnewline - \hline - \end{tabular} - \end{center} -\end{table} - -and our own new ones: - -\begin{description} -\item[{\texttt{SHMEM\_LOG\_LEVELS}:}] a comma, space, or semi-colon separated - list of logging/trace facilities to enable debugging messages. The - facilities currently include the case-insensitive names: - - \begin{table}[!h] - \begin{center} - \caption{Logging Facility Names} - \begin{tabular}{|l|l|} - \hline - Facility & Meaning\tabularnewline - \hline - \hline - FATAL & something unrecoverable happened, abort\tabularnewline - \hline - - DEBUG & used for debugging purposes\tabularnewline - \hline - INFO & something interesting happened\tabularnewline - \hline - SYMBOLS & to inspect the symbol table information\tabularnewline - \hline - VERSION & about the library version\tabularnewline - \hline - - INIT & set-up of the program\tabularnewline - \hline - FINALIZE & tear-down of the program\tabularnewline - \hline - NOTICE & important event, but non-fatal (see below)\tabularnewline - \hline - AUTH & when something is attempted but not allowed\tabularnewline - \hline - MEMORY & symmetric memory information\tabularnewline - \hline - CACHE & cache flushing operations\tabularnewline - \hline - BARRIER & about barrier operations\tabularnewline - \hline - BROADCAST & about broadcast operation\tabularnewline - \hline - COLLECT & about collect and fcollect operation\tabularnewline - \hline - QUIET & tracing network quiet events\tabularnewline - \hline - FENCE & tracing network fence events\tabularnewline - \hline - REDUCTION & about reduction operations\tabularnewline - \hline - LOCK & related to setting, testing and clearing locks\tabularnewline - \hline - SERVICE & related to the network service thread\tabularnewline - \hline - PROFILING & for the PSHMEM profiling interface\tabularnewline - \hline - MODULES & loadable modules for different implementations of routines\tabularnewline - \hline - \end{tabular} - \end{center} - \end{table} - -\item [{\texttt{SHMEM\_LOG\_FILE}:}] a filename to which to write log - messages. All PEs append to this file. The default is for all PEs to - write to standard error. Per-PE log files might be an interesting - addition. -\item [{\texttt{SHMEM\_SYMMETRIC\_HEAP\_SIZE}:}] the number of bytes - to allocate for the symmetric heap area. Can scale units with ``K'', - ``M'' etc. modifiers. The default is 2G. -\item [{\texttt{SHMEM\_BARRIER\_ALGORITHM}:}] the version of the - barrier to use. The default is ``naive''. Designed to allow people - to plug other variants in easily and test. -\item [{\texttt{SHMEM\_BARRIER\_ALL\_ALGORITHM}:}] as for - \texttt{SHMEM\_BARRIER\_ALGORITHM}, but separating these two allows us - to optimize if e.g.\ hardware has special support for global barriers. -\item [{\texttt{SHMEM\_PE\_ACCESSIBLE\_TIMEOUT}:}] the number of - seconds to wait for PEs to reply to accessiblity checks. The default - is 1.0 (i.e.\ may be fractional). Currently not used. -\end{description} - -\section{Alternate collective algorithms} - -A module system coupled with the above environment variables allows -for runtime decisions to be made about which algorithm should be used -for different collective routines. These are installed as dynamic -objects and the selected algorithm is then loaded as needed. Each -module defines a structure that maps the interface it provides to its -routines. The choice of algorithm can also be steered through an -optional configuration file (overridden by the environment variables). -The file is - -\begin{minipage}{\linewidth} -\begin{lstlisting}[caption={Configuration File Location}] -/lib/modules/config -\end{lstlisting} -\end{minipage} - -and has a simple - -\begin{minipage}{\linewidth} -\begin{lstlisting}[caption={Configuration File Format}] -algorithm = implementation -\end{lstlisting} -\end{minipage} - -format, e.g.\ - -\begin{minipage}{\linewidth} -\begin{lstlisting}[caption={Configuration File Example}] -default = tree -barrier-all = bruck -\end{lstlisting} -\end{minipage} - -\section{Compiling and Running Programs} - -\subsection{SGI SHMEM} - -SHMEM for the SGI Altix family of machines is provided as part of the -Message-Passing Toolkit (MPT) in the ProPack~\cite{ProPack} -suite. Compilation uses a standard C, C++ or Fortran compiler -(e.g.\ GNU~\cite{gcc} or Intel~\cite{intelcomp} suites) and links -against the SMA and MPI libraries: - -\begin{lstlisting}[caption={Compiling and Linking SGI SHMEM}] -$ gcc -o prog prog.c -lsma -lmpi -\end{lstlisting} - -and a compiled program (C, C++ and Fortran) is run like this via the -MPI launcher: - -\begin{lstlisting}[label=oshrun,caption={Running a program on 8 processors with the SGI SHMEM Implementation}] -$ mpirun -np 8 ./prog -\end{lstlisting} - -\subsection{\openshmem} - -In order to abstract the compilation and launching process for -\openshmem we have provided wrapper programs in the reference -implementation: - -\begin{table}[h] - \begin{center} - \caption{Wrapper Programs} - \begin{tabular}{| l | l |} - \hline - \texttt{oshcc} & compile and link C programs \tabularnewline - \hline - \texttt{oshCC} & compile and link C++ programs \tabularnewline - \hline - \texttt{oshfort} & compile and link F77/F90 programs \tabularnewline - \hline - \texttt{oshrun} & to launch programs \tabularnewline - \hline - \end{tabular} - \end{center} -\end{table} - -The similarity to the style of wrappers found in many MPI -implementations is obvious and intentional. - -So to compile with the reference implementation, the wrapper program -can be used as in listing~\ref{oshcc} to compile and link a C program: - -\begin{lstlisting}[numbers=none,label=oshcc,caption={Compiling and Linking C with the \openshmem Reference Implementation}] -$ oshcc -O3 -o prog prog.c -\end{lstlisting} - -and a compiled program (C, C++ and Fortran) is run like this: - -\begin{lstlisting}[label=oshrun,caption={Running a program on 8 processors with the \openshmem Reference Implementation}] -$ oshrun -np 8 ./prog -\end{lstlisting} - -\subsection*{Note to Implementers} - -There is \emph{no} requirement in \openshmem to provide wrapper -programs like this, it's merely a convenience that helped during the -development of the reference implementation. It does help -implementations hide the details from programmers to simplify use of -\openshmem, though. - -\section{Configuration and Installation} - -There is a top-level \texttt{configure} script that is a simplified -version of the GNU autotools. This script will eventually become the -GNU setup and will do lots more feature tests. So the usual procedure -applies: - -\begin{lstlisting} -$ /path/to/source/configure [--options...] -$ make -$ make install -\end{lstlisting} - -The \texttt{configure} script accepts a -% make sure we get a real double-dash -\texttt{-{}-help} option that lists all the various settings. - -\subsection*{Note} -You must run \texttt{configure} from a separate build directory, not -from the source tree. - -\section{Accessing \openshmem Headers and Libraries} - -There is a useful tool called \texttt{pkg-config}~\cite{pkg-config} -that abstracts access to components of the installation. With -\texttt{pkg-config} you can ask \openshmem where its header files are, -and which libraries it needs to link programs. - -E.g. - -\begin{lstlisting}[numbers=none,label=pkg-config-cflags,caption={Finding the \openshmem header directory}] -$ pkg-config --cflags openshmem --I/opt/openshmem/include -\end{lstlisting} - -\begin{lstlisting}[numbers=none,label=pkg-config-libs,caption={Finding the \openshmem libraries}] -$ pkg-config --libs openshmem --Wl,-rpath,/opt/openshmem/lib -L/opt/openshmem/lib -L/opt/gasnet/fast/lib \ - -L/usr/lib64/gcc/x86_64-suse-linux/4.6 -lopenshmem -lgasnet-ibv-par -libverbs \ - -lpthread -lrt -lgcc -lm -\end{lstlisting} - -You can then integrate these settings into your own compilation and linking commands, e.g. - -\begin{lstlisting}[numbers=none,label=pkg-config-usage,caption={Compiling with pkg-config}] -$ gcc -c $(pkg-config --cflags openshmem) prog1.c -$ gcc -c $(pkg-config --cflags openshmem) prog2.c -$ gcc -o prog prog1.o prog2.o $(pkg-config --libs openshmem) -\end{lstlisting} diff --git a/_deprecated_sources/openshmem-book/thankyou.tex b/_deprecated_sources/openshmem-book/thankyou.tex deleted file mode 100644 index 90e945221..000000000 --- a/_deprecated_sources/openshmem-book/thankyou.tex +++ /dev/null @@ -1,39 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter*{Acknowledgements} -\addcontentsline{toc}{chapter}{Acknowledgements} - -I'd like to thank my agent, Odin, The Green Bay Packers, Elvis... diff --git a/_deprecated_sources/openshmem-book/titlepage.tex b/_deprecated_sources/openshmem-book/titlepage.tex deleted file mode 100644 index 455dfff96..000000000 --- a/_deprecated_sources/openshmem-book/titlepage.tex +++ /dev/null @@ -1,71 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\begin{titlepage} - -\begin{center} - -\includegraphics{OpenSHMEM_Pound} - -% Title -\HRule \\[1.0cm] -{\huge \bfseries\scshape The \openshmem -\vspace{0.1in} -Parallel Programming Library}\\[0.4cm] -\HRule \\[1.5cm] - -\begin{minipage}{0.4\textwidth} -\begin{flushleft} \large -Tony Curtis \\ -Barbara Chapman \\ -Steve Poole \\ -\end{flushleft} -\end{minipage} - -% \author{ -% Tony Curtis \and -% Barbara Chapman \and -% Swaroop Pophale \and -% Steve Poole \and -% Jeff Kuehn -% } - -\vfill - -% \large{\today} - -\end{center} - -\end{titlepage} diff --git a/_deprecated_sources/openshmem-book/v&v.tex b/_deprecated_sources/openshmem-book/v&v.tex deleted file mode 100644 index 9636a24f4..000000000 --- a/_deprecated_sources/openshmem-book/v&v.tex +++ /dev/null @@ -1,56 +0,0 @@ -% -% Copyright (c) 2011, 2012 -% University of Houston System and Oak Ridge National Laboratory. -% -% All rights reserved. -% -% Redistribution and use in source and binary forms, with or without -% modification, are permitted provided that the following conditions -% are met: -% -% o Redistributions of source code must retain the above copyright notice, -% this list of conditions and the following disclaimer. -% -% o Redistributions in binary form must reproduce the above copyright -% notice, this list of conditions and the following disclaimer in the -% documentation and/or other materials provided with the distribution. -% -% o Neither the name of the University of Houston System, Oak Ridge -% National Laboratory nor the names of its contributors may be used to -% endorse or promote products derived from this software without specific -% prior written permission. -% -% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -% ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -% LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -% A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -% HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -% SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -% TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -% PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -% LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -% NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -% SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -% - -\chapter{Validation and Verification of \openshmem Libraries} - -If it compiles, it works. Right? - -We want to talk about the essential features and behavior -of \openshmem and how to go about testing these. - -Include discussion of UH V \& V suite and what it does. Go through -the API and discuss how and why each feature being tested is in fact -being tested. What are the important things to test for each feature? -Look for edge-cases wherever possible to help future implementors. - -\begin{description} - -\item Verification is the process of determining whether an \openshmem - library complies with the specification. - -\item Validation is the process of determining whether an \openshmem - library has ``fitness of purpose''. - -\end{description} diff --git a/_deprecated_sources/osh.tex b/_deprecated_sources/osh.tex deleted file mode 100755 index 9f88eac4a..000000000 --- a/_deprecated_sources/osh.tex +++ /dev/null @@ -1,1023 +0,0 @@ -% Version as of April 8, 1997 - -% ---------------------------------------------------------------------- -% osh.tex inspired by mpi-macs.tex --- man page macros, -% discuss, missing, mpifunc macros -% -% ---------------------------------------------------------------------- - -% TeX if definitions. These are defined here so (a) there is one place to -% change the defaults and (b) so that they can be changed by reading a -% configuration file -% Control the use of color -\newif\ifusecolor -\usecolortrue -% Control whether changes are highlighted -\newif\ifshowchange -\showchangetrue -% Control whether deleted text is shown -\newif\ifshowdelete -\showdeletetrue -% Control the use of change "bars" (really markers). Note that -% both changetrue and changebarstrue must not be set -\newif\ifchangebars -\changebarsfalse -% Control whether tickets are indicated in the margins (or inline, when -% they occur in inner mode. Note that this has no effect if \showchangetrue -% is not set -\newif\ifshowtickets -\showticketstrue -% For publisher's additions in the printed book -\newif\ifbookprinting -\bookprintingfalse - -% -% There are a number of features that are controlled by LaTeX if commands. -% This step allows you to control these through a configuration file -% This file should contain valid LaTeX commands, including comments (using -% the standard % character for comments) -% -% Known commands include: -% \showchangetrue - Show changes/additions (from a previous version) -% \showdeletetrue - Show deleted text (deleted from a previous version) -% \changebarstrue - Show change "bars" around changes (really begin/end -% markers in the output file) -% \usecolortrue - Use color to show changes -% -\newread\cfgin - -% -% General text color update macros -% These permit the use of nesting of the color changes, as well as -% a "do not change" option -% -\ifusecolor -\definecolor{orange}{rgb}{1,0.5,0} -\definecolor{purple}{rgb}{0.8,0,1} -\let\XA=\expandafter -% Definition for "Use current color" -\def\ColorSame{same} -% Create a stack that is 5 deep (no arrays in TeX) -\def\ColorS{black} -\def\ColorSi{same} -\def\ColorSii{same} -\def\ColorSiii{same} -\def\ColorSiiii{same} -% Stack pointer -\newcount\ColorStackP -\ColorStackP=0 -% Push a new color -\def\ColorPush#1{% -\global\advance\ColorStackP 1\relax% -\ifnum\ColorStackP>4\message{Font Color stack too deep}\fi% -\ifnum\ColorStackP=1\global\def\ColorSi{#1}\else -\ifnum\ColorStackP=2\global\def\ColorSii{#1}\else -\ifnum\ColorStackP=3\global\def\ColorSiii{#1}\else -\ifnum\ColorStackP=4\global\def\ColorSiiii{#1}% -\fi -\fi -\fi -\fi -\def\ColorCur{#1}% -\ifx \ColorCur\ColorSame \relax\else \color{#1}\fi -} -% Pop that color -\def\ColorPop{\global\advance\ColorStackP -1\relax% -\ifnum\ColorStackP<0\message{Font Color stack < 0}\fi% -\ifnum\ColorStackP=0\def\ColorCur{\ColorS}\else -\ifnum\ColorStackP=1\def\ColorCur{\ColorSi}\else -\ifnum\ColorStackP=2\def\ColorCur{\ColorSii}\else -\ifnum\ColorStackP=3\def\ColorCur{\ColorSiii}\else -\ifnum\ColorStackP=4\def\ColorCur{\ColorSiiii}% -\fi -\fi -\fi -\fi -\fi%\typeout{cur = \ColorCur and same = \ColorSame}% -\ifx\ColorCur\ColorSame\relax\else\color{\ColorCur}\fi% -} -% A synonym for color that is controlled by the \usecolortrue command -\def\Color#1{\color{#1}} -\else -\def\ColorPush#1{} -\def\ColorPop{} -\def\Color#1{} -\fi % \ifusecolor - -\ifshowdelete -\def\nocomment{\catcode`\%=9} -\def\restorecomment{\catcode`\%=14} -\else -\let\nocomment=\relax -\let\restorecomment=\relax -\fi - -\ifchangebars -\def\BegChange{\begchange} -\def\EndChange{\endchange} -\else -\let\BegChange=\relax -\let\EndChange=\relax -\fi - -\ifshowchange -% -% Use margin par for the ticket number unless marginpar won't work, in -% which case inline the ticket number. To do more would require special -% code for the TeX output routine, which isn't worth it for what we need -% here. -% Note that the changebars and the ticket both use marginpar, and if both -% are used at the same time, LaTeX may run out of floats -\ifchangebars -\def\ticket#1{\relax} -\else -\ifshowtickets -\def\ticket#1{\ifinner[ticket#1.]\else\protect\marginpar[\mbox{\hbox to \marginparwidth{\hss ticket#1.\hspace{30pt}}}]{\hbox to \marginparwidth{\hspace{30pt}ticket#1.\hss}}\fi} -\else -\def\ticket#1{\relax} -\fi % showtickets -\fi % changebars -\fi - -% fancyvrb defines Verbatim, which is a slightly better Verbatim environment -% Regrettably, the key feature needed, commandchars, does not work correctly -% in our environment (it doesn't accept arbitrary grouping characters). -% See README-2.2 for instructions on using Verbatim along with the above -% update macros. -%\usepackage{fancyvrb} - - -\def\snir{\relax} -\def\rins{\relax} - - -% To make the changes without showing the location or old source: -% \newcommand{\CHANGE}[2]{} -% \newcommand{\INTO}[1]{#1} -% \newcommand{\ADD}[2]{#2} - % \newcommand{\DELETE}[2]{} - - -% available: red green blue cyan yellow magenta - \def\RVWCAP/{} % shortcut for Review item 23.a - capitalization of titles (in \section} -% \def\RVWcap/{\mpiiidotiMergeFromREVIEWbegin{23.a}} % shortcut for Review item 23.a - capitalization of titles -\def\RVWcap/{} % shortcut for Review item 23.a - capitalization of titles - -\def\OnlyForAutomaticAnnexGeneration#1{}% deleting the content were the macro is used; but preserving it for the Annex - - -% This macro enables that all "_" (underscore) characters in the pfd -% file are searchable, and that cut&paste will copy the "_" as underscore. -% Without the following macro, the \_ is treated in searches and cut&paste -% as a " " (space character). -% This macro does not modify the behavior of _ in math or in verbatim -% environments. In verbatim environments, the "_" is always treated -% as a searchable character. -% -\DeclareRobustCommand{\_}{\texttt{\char`\_}} -% - -% From MPI-2.0 -% ------------ - -% Place some penalty for doing the break -% The penalty for a ``\gb'' should be greater than a \hyphenpenalty. -% \hyphenpenalty is 50 in plain.tex. -\def\gb{\penalty10000\hskip 0pt plus 8em\penalty4800\hskip 0pt plus-8em% -\penalty10000} - -% A theorem-like environment for code Examples (S. Otto) see Lamport, pg 58 -% -% Note that because we use a theorem environment that resets the counter -% with every chapter, pdflatex will issue a warning for each example that -% has the same number as an example in another chapter. This is too hard -% to fix (the easist way is to not use the theorem environment and roll -% a custom environment, including adding the necessary low-level commands -% for the pdf link support. -%\newtheorem{example}{Example}[chapter] -% Theorems have \em text; we want \rm. The easiest way to fix this, -% since we are not using Theorems, is to change the @begintheorem macro -\makeatletter -\def\@begintheorem#1#2{\rm \trivlist \item[\hskip \labelsep{\bf #1\ #2}]} -\makeatother -% Use \exindex{MPI\_FUNC} to generate an index entry for MPI -% functions/constants -%\newcommand{\exindex}[1]{\relax} -%\newcommand{\exindex}[1]{\index{EXAMPLES:#1}} - -% a couple of commands from Marc Snir, modified S. Otto - -%\newlength{\discussSpace} -%\setlength{\discussSpace}{.4cm} - -%\newenvironment{funcdef}[1]{ -% \vspace{\codeSpace} -% \noindent -% \samepage -% \hangindent 7em\hangafter=1 -% {\funcNoIndex{{\prefix}#1}}\mpifuncmainindex{#1} -% \MPIfunclist -%}{\end{list} \vspace{\codeSpace}} - -%\newcommand{\function}[1]{{\raggedright \hangindent 7em\hangafter=1\tt #1 \par \vspace{0.1in}}} -%\newcommand{\CM}{Communication Middleware} -% Watermark -% -% For release please remove waternark and lino -% DRAFT !!! -%\usepackage{draftwatermark} - -% max depth for table of content -\setcounter{tocdepth}{4} -% set path to figures -\graphicspath{{./figures/}} - -\usepackage{xspace} -%\sloppy -%\newcommand{\openshmem} {\mbox{OpenSHMEM}\xspace % \textsuperscript{{\small \texttrademark}} -%} -\newcommand{\openshmem} {{Open\-SHMEM}\xspace} -\newcommand{\insertDocVersion}{1.2} -\newcommand{\deprecate}[1]{\textcolor{Gray}{#1 (deprecated)}} -%\renewcommand\linenumberfont{\normalfont\scriptsize\sffamily} - -\hypersetup{pdftitle={\openshmem Specification Draft}, - pdfauthor={HPC Tools, University of Houston}, - pdfkeywords={Specification, Draft, \openshmem, SHMEM, PGAS, Partitioned, Global, Address, Gasnet, Parallel}} - -\definecolor{gray}{rgb}{0.92,0.92,0.92} -\lstset{ % - breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace - basicstyle=\ttfamily\footnotesize, - %identifierstyle=\ttfamily\itshape, - breaklines=true, % sets automatic line breaking - escapeinside={\%*}{*)}, % if you want to add LaTeX within your code - extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8 - %frame=single, % adds a frame around the code - keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible) - %keywordstyle=\color{blue}, % keyword style - %language=Octave, % the language of the code - morekeywords={*,...}, % if you want to add more keywords to the set - % numbers=left, % where to put the line-numbers; possible values are (none, left, right) - %numbersep=5pt, % how far the line-numbers are from the code - showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces' - showstringspaces=false, % underline spaces within strings only - showtabs=false, % show tabs within strings adding particular underscores - %tabsize=2, % sets default tabsize to 2 spaces - %title=\lstname, % show the filename of files included with \lstinputlisting; also try caption instead of title -}\newcommand{\discuss}[1]{} - -% long discussion -\bgroup\catcode`\{=10\catcode`\}=10\catcode`\[=1\catcode`\]=2\long\gdef\Eatdiscussion#1end{discussion}[\relax\end[discussion]]\relax\egroup\newenvironment{discussion}{\bgroup\def\do##1{\catcode`##1=10}\dospecials\Eatdiscussion}{\egroup} - -\newcommand{\missing}[1]{} - -\newcommand{\alter}[1]{} - -\newcommand{\status}[1]{} - -% special comment command for last round -%\newcommand{\question}[1]{\vspace{\discussSpace} {\small {\bf Question:} #1} \vspace{\discussSpace}} -\newcommand{\question}[1]{} -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% Use this to make pages start on the right side -%\newcommand{\startchap}[0]{\relax} - -%\newlength{\codeSpace} -%\setlength{\codeSpace}{.4cm} - - -\def\RMA/{\textsf{RMA}} % RMA macro -- see TEX Book, pg 8,204 bloody TEX!! - -\newcommand{\uu}[1]{\underline{\hyperpage{#1}}} -\newcommand{\typedefindex}[1]{\index{TYPEDEF:#1}} -\newcommand{\cdeclindex}[1]{\index{CONST:#1}} % for index entry of C declarations like MPI_Comm -\newcommand{\cdeclmainindex}[1]{\index{CONST:#1|uu}} - -\def\MPIfunclist{ - \begin{list}{}{ % see pg 113 of Lamport's book - \setlength{\leftmargin}{105pt} - \setlength{\labelwidth}{80pt} - \setlength{\labelsep}{10pt} - \setlength{\itemindent}{0pt} - \setlength{\itemsep}{0pt} - \setlength{\topsep}{2pt} - } -} - -\newlength{\codeSpace} -\setlength{\codeSpace}{.4cm} - -\newcommand{\IN}[0]{{\small IN}} -\newcommand{\OUT}[0]{{\small OUT}} -\newcommand{\INOUT}[0]{{\small INOUT}} -\newcommand{\prefix}[0]{{LLC\_}} - -\newcommand{\mpiarg}[1]{\gb\textsf{#1}} - -\newcommand{\funcarg}[3]{\item[\hbox to 30pt{\textsf{#1} \hfill} \mpiarg{#2}\hfill]{\small #3}} - -\def\gb{\penalty10000\hskip 0pt plus 8em\penalty4800\hskip 0pt plus-8em% -\penalty10000} -\newcommand{\funcNoIndex}[1]{\gb\textsf{#1}} -\newcommand{\mpifuncmainindex}[1]{\index{#1|uu}} - -% special for functions that you don't want listed in index. -% This was added for showing corrections to functions already listed. -\newenvironment{funcdefnolist}[1]{ - \vspace{\codeSpace} - \vspace{\codeSpace} - \noindent - \samepage - {\func{#1}} - \begin{list}{}{ % see pg 113 of Lamport's book - \setlength{\leftmargin}{200pt} - \setlength{\labelwidth}{180pt} - \setlength{\labelsep}{10pt} - \setlength{\itemindent}{0pt} - \setlength{\itemsep}{0pt} - \setlength{\topsep}{5pt} - } -}{\end{list} \vspace{\codeSpace}} - - -\newenvironment{ffuncdef}[1]{ - \vspace{\codeSpace} - \noindent - Fortran binding: - - \noindent - \samepage - {\ffunc{#1}} - \begin{list}{}{ % see pg 113 of Lamport's book - \setlength{\leftmargin}{200pt} - \setlength{\labelwidth}{180pt} - \setlength{\labelsep}{10pt} - \setlength{\itemindent}{0pt} - \setlength{\itemsep}{0pt} - \setlength{\topsep}{5pt} - } -}{\end{list} \vspace{\codeSpace}} - - % see page 77, the TeX book. -\newcommand{\cfunc}[1]{\gb\textsf{#1}} -\newcommand{\ffunc}[1]{\gb\textsf{#1}} -\newcommand{\const}[1]{\protect\gb\protect{\textsf{\small #1}}\index{CONST:#1}} -\newcommand{\constskip}[1]{\protect\gb\protect{\textsf{\small #1}}} -% for ones from MPI-1 -\newcommand{\consti}[1]{\protect\gb\protect{\textsf{\small #1}}\index{CONST:#1}} % constants/handles - language independent -%\newcommand{\consti}[1]{\protect\gb\protect{\small\sf #1}\index{CONST:#1}} % constants/handles - language independent -\newcommand{\constiskip}[1]{\protect\gb\protect{\textsf{\small #1}}} % ... same, but not in the Constant Index -\newcommand{\constitemtwo}[3]{\item[\const{#1}, \const{#2}\hfill]{#3}} -\newcommand{\constitemthree}[4]{\item[\const{#1}, \const{#2}, \const{#3}\hfill]{#4}} -% for ones that don't go in index -\newcommand{\constskipitem}[2]{\item[\constskip{#1}\hfill]{#2}} -% \newcommand{\carg}[1]{\gb\textsf{#1}} % currently not used -% \newcommand{\farg}[1]{\gb\textsf{#1}} % currently not used -\newcommand{\type}[1]{\gb\textsf{#1}\index{CONST:#1}} % datatype handles -% -\newcommand{\gtype}[1]{\textsf{#1}} % generic (language independent) type -\newcommand{\shorttype}[1]{\textsf{#1}\index{CONST:#1}} % ... same but without \gb panelty -\newcommand{\ctype}[1]{\gb\texttt{#1}} % - and corresponding C type -\newcommand{\ftype}[1]{\gb\texttt{#1}} % - and corresponding Fortran type -% -% Info is for MPI_Info predefined strings. \infokey{keyname} and -% \infoval{keyvaluename} -\newcommand{\info}[1]{\protect\gb\protect{\small\sf #1}\index{CONST:#1}} -\newcommand{\infoval}[1]{\protect\gb\protect{\small\sf #1}\index{CONST:#1}} -\let\infokey=\infoval -\newcommand{\infoskip}[1]{\protect\gb\protect{\small\sf #1}} -% -\newcommand{\error}[1]{\protect\gb\protect{\small\sf #1}\index{CONST:#1}} -\newcommand{\errorskip}[1]{\protect\gb\protect{\small\sf #1}} -\newcommand{\errori}[1]{\protect\gb\protect{\small\sf #1}} - - -\def\class{$\langle$CLASS$\rangle$} - -\newenvironment{constlist}[0]{ - \vspace{\codeSpace} - \noindent - \begin{list}{}{ % see pg 113 of Lamport's book - \setlength{\leftmargin}{200pt} - \setlength{\labelwidth}{190pt} - \setlength{\labelsep}{10pt} - \setlength{\itemindent}{10pt} - \setlength{\itemsep}{-5pt} - \setlength{\topsep}{-5pt} - } -}{\end{list} \vspace{\codeSpace}} - -% some commands from Bill Gropp - -\def\code#1{\texttt{#1}} -\def\setmargin#1{\begingroup\leftmargin #1 \advance\leftmargin\labelsep - \leftmargini #1 \advance\leftmargini\labelsep} -\def\esetmargin{\endgroup} -\def\ibamount{3.0cm\relax} -\def\ibaamount{4.0cm} -\def\ibdamount{4.5cm} -\def\ibcamount{2.0cm} -\def\ib#1{\hbox to \ibamount{#1\hfil}} -\def\iba#1{\hbox to \ibaamount{#1\hfil}} -\def\ibd#1{\hbox to \ibdamount{#1\hfil}} -\def\ibc#1{\hbox to \ibcamount{#1\hfil}} - -% Use \code{...} for code fragments -%\def\code#1{\texttt{#1}} -% Use \df{name} for a definition of name in the text -\def\df#1{{\bf #1}} -% Use \note{text} for marginal notes -\def\note#1{\marginpar{\bf #1}} - -% -% Get line numbers in the gutters. Thanks to Guy Steele and HPFF! -% - -\makeatletter -% -% This is used to put line numbers on plain pages. Used in draft.tex -% -\def\withlinenumbers{\relax - \def\@evenfoot{\hbox to 0pt{\hss\LineNumberRuler\hskip 1.5pc}\hfil}\relax - \def\@oddfoot{\hfil\hbox to 0pt{\hskip 1.5pc\LineNumberRuler\hss}}} - -\def\LineNumberRuler{\vbox to 0pt{\vss\normalsize \baselineskip13.6pt - \lineskip 1pt \normallineskip 1pt \def\baselinestretch{1}\relax - \LNR{1}\LNR{2}\LNR{3}\LNR{4}\LNR{5}\LNR{6}\LNR{7}\LNR{8}\LNR{9} - \LNR{10}\LNR{11}\LNR{12}\LNR{13}\LNR{14} - \LNR{15}\LNR{16}\LNR{17}\LNR{18}\LNR{19} - \LNR{20}\LNR{21}\LNR{22}\LNR{23}\LNR{24} - \LNR{25}\LNR{26}\LNR{27}\LNR{28}\LNR{29} - \LNR{30}\LNR{31}\LNR{32}\LNR{33}\LNR{34}\LNR{35} - \LNR{36}\LNR{37}\LNR{38}\LNR{39} - \LNR{40}\LNR{41}\LNR{42}\LNR{43}\LNR{44} - \LNR{45}\LNR{46}\LNR{47}\LNR{48} - \vskip 31pt}} -\def\LNR#1{\hbox to 1pc{\hfil\tiny#1\hfil}} - -% jmm; merge the withlinenumbers stuff into -% the centered page numbers that tex defines by default -\def\ps@plainwithlinenumbers{\let\@mkboth\@gobbletwo - \def\@oddhead{} - \def\@oddfoot{\hfil\rm\thepage\hfil - \hbox to 0pt{\hskip 1.5pc\LineNumberRuler\hss}} - \def\@evenhead{} - \def\@evenfoot{\hbox to 0pt{\hss - \LineNumberRuler\hskip 1.5pc}\rm\hfil\thepage\hfil}} - -% The old version; uncommenting the withlinenumbers part didn't -% work because that macro replaced the footer definition that -% did page numbering. -%\def\ps@plainwithlinenumbers{\ps@plain}%\withlinenumbers} -% end jmm changes - -% -% 1st page of a chapter has its own page style, so we have to put line -% numbers in here also. -% -\newwrite\chappages -\immediate\openout\chappages=chappage.txt -\def\writespace{ } -% -% Contents is done with \chapter*{Contents}, so we need to turn off the -% line numbers in this case. Easiest to look at def -% -\def\incontents{0} -\newif\ifcontents -\contentsfalse -\def\chapter{\clearpage \ifcontents\else\thispagestyle{plainwithlinenumbers}\fi - \write\chappages{Chapter \thechapter\writespace - \the\count0} - \global\@topnum\z@ \@afterindentfalse \secdef\@chapter\@schapter} - -% -% Change "Chapter" to "Chapter", "Appendix" to "Annex" -% -\renewcommand{\chaptername}{Chapter} -\renewcommand{\appendixname}{Annex} -% ... old code does not work correctly with pdflatex -% \def\@chapapp{Chapter} -% \def\appendix{\par -% \setcounter{chapter}{0} -% \setcounter{section}{0} -% \def\@chapapp{Annex} -% \def\thechapter{\Alph{chapter}}} - -\makeatother - - -% -% Also from HPFF. These look potentially useful. -% - -\newenvironment{rationale}{\begin{list}{}{}\item[]{\it Rationale.} -}{{\rm ({\it End of rationale.})} \end{list}} - -\newenvironment{implementors}{\begin{list}{}{}\item[]{\it Advice - to implementors.} -}{{\rm ({\it End of advice to implementors.})} \end{list}} - -\newenvironment{users}{\begin{list}{}{}\item[]{\it Advice to users.} -}{{\rm ({\it End of advice to users.})} \end{list}} - - - -% -% Use Sans Serif font for sections, etc. S. Otto -% -\makeatletter -\def\section{\@startsection {section}{1}{\z@}{-3.5ex plus -1ex minus --.2ex}{2.3ex plus .2ex}{\Large\sf}} -\def\subsection{\@startsection{subsection}{2}{\z@}{-3.25ex plus -1ex minus --.2ex}{1.5ex plus .2ex}{\large\sf}} -\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-3.25ex plus --1ex minus -.2ex}{1.5ex plus .2ex}{\normalsize\sf\bf}} -\def\paragraph{\@startsection {paragraph}{4}{\z@}{3.25ex plus 1ex -minus .2ex}{-1em}{\normalsize\sf}} -\makeatother -% -% An Editor's Note macro -% -\def\ednote#1{{\sl Editor's note: #1}} - -% a way to comment out large sections of text -\newcommand{\commentOut}[1]{{}} - -% -% A few commands to help in writing MPI man pages -% -\def\twoc#1#2{ -\begin{list} -{\hbox to95pt{#1\hfil}} -{\setlength{\leftmargin}{120pt} - \setlength{\labelwidth}{95pt} - \setlength{\labelsep}{0pt} - \setlength{\partopsep}{0pt} - \setlength{\parskip}{0pt} - \setlength{\topsep}{0pt} -} -\item -{#2} -\end{list} -} -\outer\long\def\onec#1{ -\begin{list} -{} -{\setlength{\leftmargin}{25pt} - \setlength{\labelwidth}{0pt} - \setlength{\labelsep}{0pt} - \setlength{\partopsep}{0pt} - \setlength{\parskip}{0pt} - \setlength{\topsep}{0pt} -} -\item -{#1} -\end{list} -} -\def\manhead#1{\noindent{\bf{#1}}} - -\makeatletter -% -% make our own index environment that can have a different -% title than just "Index" -- S. Otto -% -\def\@index{Index} -\def\@introtext{} % MPI-2.1 -\newif\if@restonecol -\def\myindex{\@restonecoltrue\if@twocolumn\@restonecolfalse\fi -\columnseprule \z@ -%\columnsep 35pt\twocolumn[\@makeschapterhead{Index}] - %\@mkboth{INDEX}{INDEX}\thispagestyle{plain}\parindent\z@ -%\columnsep 35pt\twocolumn[\@makeschapterhead{\@index}] -\columnsep 35pt\twocolumn[\@makeschapterhead{\@index}\@introtext\vspace{15pt}] % MPI-2.1 - \@mkboth{\@index}{\@index}\thispagestyle{plain}\parindent\z@ - \parskip\z@ plus .3pt\relax\let\item\@idxitem} -\def\@idxitem{\par\hangindent 40pt} -\def\subitem{\par\hangindent 40pt \hspace*{20pt}} -\def\subsubitem{\par\hangindent 40pt \hspace*{30pt}} -\def\endmyindex{\if@restonecol\onecolumn\else\clearpage\fi} -\def\indexspace{\par \vskip 10pt plus 5pt minus 3pt\relax} -\makeatother - -%macros for language binding: mpibind, mpifbind, and fargs: - - -\def\fargs{\\\advance\leftskip 2em} - -\raggedbottom - -% from binding chapter for appendix B. -% -*- latex -*- - -\makeatletter -\newbox\arg@box - -\def\separator{\rule{\linewidth}{0.5pt}} -\def\function#1{\texttt{#1}} -\def\variable#1{\texttt{#1}} - -\def\subtitle{\pagebreak[3]\@ifstar{\@subtit@star}{\@subtit@norm}} -\def\@subtit@star#1{ - \item[\hbox{\normalsize\sf\begin{tabular}[t]{l}#1\end{tabular}}\hfill] - \hfil\par - \expandafter{\let\par=\space\ignorespaces\let\par=\endgraf} -} -\def\@subtit@norm#1{ - \setbox\arg@box=\hbox{\normalsize\sf\begin{tabular}[t]{l}#1\end{tabular}} - \ifdim \wd\arg@box > \labelwidth \item[\copy\arg@box\hfill]\hfil\par - \else \dp\arg@box=0pt \item[\copy\arg@box\hfill] \fi - \expandafter{\let\par=\space\ignorespaces\let\par=\endgraf} -} - -\newenvironment{manpage}[3]{\@beginManpage#1\@@#2\@@#3\@@}{\@endManpage} - -\def\@beginManpage#1\@@#2\@@#3\@@{ - \addcontentsline{toc}{subsection}{#2} - \clearpage - \begin{list}{}{ - \setlength\labelwidth{1.2in} - \setlength\leftmargin{\labelwidth} - \addtolength\leftmargin{\labelsep} - \topsep 5pt plus 2pt minus 2pt - \itemsep 5pt plus 2pt minus 2pt - \parsep 10pt plus 2pt minus 2pt - \raggedbottom - } - } - -\def\@endManpage{\end{list} \clearpage \flushbottom} - -\makeatother - -% The next set of macros define change marks for the document -%---------------------------------------------------------------------- -% intended for general change marks not associated with a certain version -%\def\begchange{\marginpar[\hspace*{-60pt}\mbox{\hspace*{10pt} -%$\top$ \tiny (General)}]{\mbox{$\top$ \tiny (General)}}} -%\def\endchange{\marginpar[\hspace*{-60pt}\mbox{\hspace*{10pt} -%$\bot$ \tiny (General)}]{\mbox{$\bot$ \tiny (General)}}} -% -% These versions are careful to generate no extraneous error messages -% about overfull boxes -% -% Because they use marginpar, they can't be used everywhere. If -% marginpar isn't available, they do *not* add any marks. -% -\def\begchange{\ifinner\else\protect\marginpar[\mbox{\hbox to - \marginparwidth{\hss\mbox{\hspace*{10pt}$\top$ \tiny - (Fin2)}\hspace*{30pt}}}]{\hbox to \marginparwidth{\mbox{$\top$ \tiny (Fin2)}\hss}}\fi} -\def\endchange{\ifinner\else\protect\marginpar[\mbox{\hbox to \marginparwidth{\hss\mbox{\hspace*{10pt}$\bot$ \tiny (Fin2)}\hspace*{30pt}}}]{\hbox to \marginparwidth{\mbox{$\bot$ \tiny (Fin2)}\hss}}\fi} -%get rid of these change marks -%\def\begchange{} -%\def\endchange{} - -%\def\begchange{\ifinner\else\protect\fi} -%\def\endchange{\ifinner\else\fi} - -% change marks for June draft -\def\begchangejune{} -\def\endchangejune{} - -% change marks for July draft -\def\begchangejuly{} -\def\endchangejuly{} - -% change marks for Sept draft -\def\begchangesept{} -\def\endchangesept{} - -% change marks for Oct draft -\def\begchangeoct{} -\def\endchangeoct{} - -% change marks for Dec draft - for RT -\def\begchangedec{} -\def\endchangedec{} - -% change marks for Jan draft -\def\begchangejan{} -\def\endchangejan{} - -% change marks for February draft - for RT -\def\begchangefeb{} -\def\endchangefeb{} - -% change marks for March draft -\def\begchangemar{} -\def\endchangemar{} -\def\begchangemarch{} -\def\endchangemarch{} - -% change marks for April draft -\def\begchangeapr{} -\def\endchangeapr{} - -% change marks for first final draft -\def\begchangefini{} -\def\endchangefini{} - -% change marks for second final draft -\def\begchangefinii{} -\def\endchangefinii{} - -%\def\begchangefiniii{\marginpar[\mbox{\hbox to -% \marginparwidth{\hss\mbox{\hspace*{10pt}$\top$ \tiny -% (Fin3)}\hspace*{30pt}}}]{\hbox to \marginparwidth{\mbox{$\top$ \tiny (Fin3)}\hss}}} -%\def\endchangefiniii{\marginpar[\mbox{\hbox to \marginparwidth{\hss\mbox{\hspace*{10pt}$\bot$ \tiny (Fin3)}\hspace*{30pt}}}]{\hbox to \marginparwidth{\mbox{$\bot$ \tiny (Fin3)}\hss}}} -%get rid of these change marks -\def\begchangefiniii{} -\def\endchangefiniii{} -%---------------------------------------------------------------------- -\newcommand{\startchap}[0]{%\cleardoublepage -} - -\newcommand{\OSH}{\emph{OpenSHMEM}} -\newcommand{\rcomment}[1] - {{\color{red}\textsf{#1}}} - -\newcommand{\bAPI}[2]{ -\subsubsection{\bf #1}%\hfill -#2 -\hfill %\\ -\begin{description} -%\par\nobreak\vspace{-\parskip} - \item[SYNOPSIS] \hfill \\ \\ - %\\ \\ -} - -\newcommand{\eAPI}{ -\end{description} -} - -\newcommand{\synC}{%\hfill \\ - %\vspace{-\parskip} \par - \textbf{C/C++:} - \begin{lstlisting} [language={C}, backgroundcolor=\color{gray}, lineskip=2pt, morekeywords={size_t}, aboveskip=0pt, belowskip=0pt] -} - -\newcommand{\synCE}{ -\end{lstlisting} -} - -\newcommand{\synFE}{ -\end{lstlisting} -} - -\newcommand{\synF}{%\hfill \\ - %\vspace{-\parskip} \par - \textbf{FORTRAN:} - \begin{lstlisting} [language={Fortran}, backgroundcolor=\color{gray}, lineskip=3pt, deletekeywords={TARGET,LEN}, aboveskip=0pt, belowskip=0pt] -} -\newcommand{\aC}[1] {\textit{#1}} -\newcommand{\sC}[1] {\textbf{#1}} -\newcommand{\aF}[1]{\textit{#1}} - - \newcommand{\desB}[3] {\hfill - \item[DESCRIPTION] \hfill %\\ - \begin{description} - \item[Arguments] \hfill \\ - #1 - \hfill %\\ \\ - \item[API description] \hfill \\ %\\ - #2 %\par - \hfill \\ %\\ - #3 - \hfill \\ %\\ - \end{description} - } -\newcommand{\argRow}[3] { - \begin{tabular}{p{2cm} p{2cm} p{10cm}} - \textbf{#1} & \textit{#2} & {#3} \\ - \end{tabular} -} -\newcommand{\desTB}[2] {#1 \\ \\ - \begin{tabular}{p{5cm} p{9cm}} - \hline - Routine & Data Type of dest and source\\ - \hline \tabularnewline - \end{tabular}\\ - #2 - %\hfill -} - -\newcommand{\desTBC}[4] {#1 \\ \\ - \begin{tabular}{p{5cm} p{9cm}} - \hline - #2 & #3\\ - \hline \tabularnewline - \end{tabular}\\ - #4 - \hfill -} - -\newcommand{\desR}[1] -{\hfill %\\ \\ - \item[Return Values] \hfill \\ %\\ - #1 - \\ -} - -\newcommand{\cRow}[2]{ - \begin{tabular}{p{5cm} p{9cm}} - #1 & #2 \tabularnewline - \end{tabular}\\ -} - -\newcommand{\notesB}[1]{\hfill %\\ \\ -\item[Notes] \hfill \\ %\\ -#1 -%\\ -} - -\newcommand{\notesImp}[1]{\hfill %\\ \\ -\item[Note to implementors] \hfill \\ %\\ -#1 -%\\ -} - -\newcommand{\exampleB}[1] { -\item[EXAMPLES] \hfill \\ \\ - #1 -} - -\newcommand{\exampleITEM}[3] { - #1 - \lstinputlisting[language={C}, tabsize=2, basicstyle=\ttfamily\footnotesize, morekeywords={size_t}] {#2} - #3 -} - -\newcommand{\exampleITEMF}[3] { - #1 - \lstinputlisting[language={Fortran}, tabsize=2, basicstyle=\ttfamily\footnotesize, deletekeywords={TARGET}] {#2} - #3 -} - -\newcommand{\source}{\textit{source}} -\newcommand{\target}{\textit{target}} -\newcommand{\PUT}{\textit{Put}} -\newcommand{\GET}{\textit{Get}} -\newcommand{\OPR}[1]{\textit{#1}} -\newcommand{\dest}{\textit{dest}} - - -%\newcommand{\FUNC}[1] {\texttt{#1}} -%\newcommand{\FUNC}[1] {{\bf \texttt{#1}}} -%\newcommand{\FUNC}[1] {\texttt{#1}} -\newcommand{\FUNC}[1] {\textit{#1}} - -\newcommand{\VAR}[1] {\textit{#1}} - -\newcommand{\CONST}[1] {\textit{#1}} - -\newcommand{\CorCpp}{\textit{C/C++}} - -\newcommand{\Fortran}{\textit{Fortran}} - -\newcommand{\Clang}{\textit{C}} - -\newcommand{\Cpp}{\textit{C++}} - -\newcommand{\barrier}{\FUNC{SHMEM\_BARRIER}} -\newcommand{\barrierall}{\FUNC{SHMEM\_BARRIER\_ALL}} -\newcommand{\broadcast}{\FUNC{SHMEM\_BROADCAST}} -\newcommand{\collect}{\FUNC{SHMEM\_COLLECT}} -\newcommand{\reduction}{\textit{Reduction Operations}} -\newcommand{\activeset}{\textit{Active~set}} -\newcommand{\shmemprefix}{\textit{SHMEM\_}} -\newcommand{\shmemprefixC}{\textit{\_SHMEM\_}} - -\def\StandardListing { - \lstset { -%% basicstyle=\scriptsize\ttfamily, -%% backgroundcolor=\color{ListingBG}, -%% showspaces=false, -%% showstringspaces=false, -%% showtabs=false, -%% frame=tlBR, -%% frameround=tttt, -%% numbers=none, -%% caption=\lstname - breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace - basicstyle=\ttfamily\footnotesize, - breaklines=true, % sets automatic line breaking - escapeinside={\%*}{*)}, % if you want to add LaTeX within your code - extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8 - keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible) - morekeywords={*,...}, % if you want to add more keywords to the set - showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces' - showstringspaces=false, % underline spaces within strings only - showtabs=false, % show tabs within strings adding particular underscores - backgroundcolor=\color{gray}, - } -} - -% annotated program source should be line numbered though - -\def\ProgramNumberedListing { - \StandardListing - \lstset { - numbers=left, - numberstyle=\footnotesize - } -} - -% new command to show program listings - -\newcommand{\numberedlisting}[2] { - \ProgramNumberedListing - \lstinputlisting[#1]{#2} - \StandardListing -} - -\lstdefinelanguage{OSH+C}[]{C}{ - classoffset=1, - morekeywords={ - _SHMEM_BCAST_SYNC_SIZE, _SHMEM_SYNC_VALUE, - start_pes, - my_pe, _my_pe, shmem_my_pe, - num_pes, _num_pes, shmem_n_pes, - shmem_int_p, shmem_short_p, shmem_long_p, - shmem_int_put, shmem_short_put, shmem_long_put, - shmem_barrier_all, shmem_barrier, - shmalloc, shfree, shrealloc, - shmem_broadcast32, shmem_broadcast64, - shmem_short_inc, shmem_int_inc, shmem_long_inc, - shmem_short_add, shmem_int_add, shmem_long_add, - shmem_short_finc, shmem_int_finc, shmem_long_finc, - shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, - shmem_set_lock, shmem_test_lock, shmem_clear_lock, - shmem_long_sum_to_all, - shmem_complexd_sum_to_all, - }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH2+C}[]{OSH+C}{ - classoffset=1, - morekeywords={ - shmem_init, - shmem_finalize, - shmem_malloc, - shmem_my_pe, - shmem_error, - shmem_global_exit, - }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=true -} - -\lstdefinelanguage{OSH+F}[]{Fortran}{ - classoffset=1, - morekeywords={ - SHMEM_BCAST_SYNC_SIZE, SHMEM_SYNC_VALUE, - start_pes, - my_pe, shmem_my_pe, - num_pes, shmem_n_pes, - shmem_int_p, shmem_short_p, shmem_long_p, - shmem_int_put, shmem_short_put, shmem_long_put, - shmem_barrier_all, shmem_barrier, - shpalloc, shpdeallc, shpclmove, - shmem_broadcast32, shmem_broadcast64, - shmem_broadcast4, shmem_broadcast8, - shmem_short_inc, shmem_int_inc, shmem_long_inc, - shmem_short_add, shmem_int_add, shmem_long_add, - shmem_short_finc, shmem_int_finc, shmem_long_finc, - shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, - shmem_set_lock, shmem_test_lock, shmem_clear_lock, - shmem_long_sum_to_all, - }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=false -} - -\lstdefinelanguage{OSH2+F}[]{OSH+F}{ - classoffset=1, - morekeywords={ - shmem_init, - shmem_finalize, - shmem_malloc, - shmem_my_pe, - shmem_error, - shmem_global_exit, - }, - keywordstyle=\color{black}\textbf, - classoffset=0, - sensitive=true -} - -\newcommand{\outputlisting}[2] { -\begin{minipage}{\linewidth} -\vspace{0.1in} - \lstinputlisting[#1]{#2} - \StandardListing -\vspace{0.1in} -\end{minipage} -} - -\usepackage[normalem]{ulem} -\hyphenation{Open-SHMEM} diff --git a/content/atomics_intro.tex b/content/atomics_intro.tex index 911fe3adb..6945b2a3a 100644 --- a/content/atomics_intro.tex +++ b/content/atomics_intro.tex @@ -1,71 +1,117 @@ -An \ac{AMO} is a one-sided communication mechanism that combines memory update -operations with atomicity guarantees described in Section +An \ac{AMO} is a one-sided communication mechanism that combines memory read, +update, or write operations with atomicity guarantees described in Section~% \ref{subsec:amo_guarantees}. Similar to the \ac{RMA} routines, described in -Section \ref{sec:rma}, the \acp{AMO} are performed only on symmetric objects. -\openshmem{} defines the two types of \ac{AMO} routines: +Section~\ref{sec:rma}, the \acp{AMO} are performed only on symmetric objects. +\openshmem defines two types of \ac{AMO} routines: + \begin{itemize} -\item % Blocking\\ -The \textit{fetching} routines return the original value of, and optionally -update, the remote data object in a single atomic operation. The routines -return after the data has been fetched and delivered to the local \ac{PE}. -The \textit{fetching} operations include: \FUNC{SHMEM\_FETCH}, -\FUNC{SHMEM\_CSWAP}, \FUNC{SHMEM\_SWAP}, \FUNC{SHMEM\_FINC}, and \FUNC{SHMEM\_FADD}. +\item + The \emph{fetching} routines return the original value of, and optionally + update, the remote data object in a single atomic operation. The routines + return after the data has been fetched from the target \ac{PE} and delivered + to the calling \ac{PE}. + The data type of the returned value is the same as the type of + the remote data object. + + The fetching routines include: + \FUNC{shmem\_atomic\_\{fetch, compare\_swap, swap\}} and + \FUNC{shmem\_atomic\_fetch\_\{inc, add, and, or, xor\}}. + +\item + The \emph{non-fetching} routines update the remote data object in a single + atomic operation. A call to a non-fetching atomic routine issues the atomic + operation and may return before the operation executes on the target \ac{PE}. + The \FUNC{shmem\_quiet}, \FUNC{shmem\_barrier}, or \FUNC{shmem\_barrier\_all} + routines can be used to force completion for these non-fetching + atomic routines. + + The non-fetching routines include: + \FUNC{shmem\_atomic\_\{set, inc, add, and, or, xor\}}. + +\end{itemize} -\item % Non-Blocking\\ -The \textit{non-fetching} atomic routines update the remote memory in a single -atomic operation. A \textit{non-fetching} atomic routine starts the atomic -operation and may return before the operation execution on the remote \ac{PE}. -To force completion for these \textit{non-fetching} atomic routines, -\FUNC{shmem\_quiet}, \FUNC{shmem\_barrier}, or \FUNC{shmem\_barrier\_all} can be -used by an \openshmem{} program. +Where appropriate compiler support is available, \openshmem provides +type-generic \ac{AMO} interfaces via \Cstd[11] generic selection. +The type-generic support for the \ac{AMO} routines is as follows: -The \textit{non-fetching} operations include: \FUNC{SHMEM\_SET}, \FUNC{SHMEM\_INC} and -\FUNC{SHMEM\_ADD}. +\begin{itemize} +\item \FUNC{shmem\_atomic\_\{compare\_swap, fetch\_inc, inc, fetch\_add, add\}} + support the ``standard \ac{AMO} types'' listed in Table~\ref{stdamotypes}, +\item \FUNC{shmem\_atomic\_\{fetch, set, swap\}} support + the ``extended \ac{AMO} types'' listed in Table~\ref{extamotypes}, and +\item \FUNC{shmem\_atomic\_\{fetch\_and, and, fetch\_or, or, fetch\_xor, xor\}} + support the ``bitwise \ac{AMO} types'' listed in Table~\ref{bitamotypes}. \end{itemize} -Where appropriate compiler support is available, \openshmem{} provides type-generic -atomic memory operation interfaces via \Celev{} generic selection. The type-generic -\ac{AMO} routines each support the ``standard \ac{AMO} types’’ listed in Table \ref{stdamotypes}, -except for \FUNC{shmem\_fetch}, \FUNC{shmem\_set}, and \FUNC{shmem\_swap}, which supports the ``extended \ac{AMO} types’’ listed -in Table \ref{extamotypes}. +The standard, extended, and bitwise \ac{AMO} types include some of the exact-width +integer types defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.1.1 and +\Cstd[11]~\S7.20.1.1. When the \Cstd translation environment +does not provide exact-width integer types with \HEADER{stdint.h}, an +\openshmem implemementation is not required to provide support for these types. \begin{table}[h] \begin{center} \begin{tabular}{|l|l|} \hline - \TYPE & \TYPENAME\\ - \hline - int & int\\ - \hline - long & long\\ - \hline - long long & longlong\\ - \hline + \TYPE & \TYPENAME \\ \hline + int & int \\ \hline + long & long \\ \hline + long long & longlong \\ \hline + unsigned int & uint \\ \hline + unsigned long & ulong \\ \hline + unsigned long long & ulonglong \\ \hline + int32\_t & int32 \\ \hline + int64\_t & int64 \\ \hline + uint32\_t & uint32 \\ \hline + uint64\_t & uint64 \\ \hline + size\_t & size \\ \hline + ptrdiff\_t & ptrdiff \\ \hline \end{tabular} - \caption{Standard \ac{AMO} Types and Names} + \TableCaptionRef{Standard \ac{AMO} Types and Names} \label{stdamotypes} - \end{center} + \end{center} \end{table} \begin{table}[h] \begin{center} \begin{tabular}{|l|l|} \hline - \TYPE & \TYPENAME\\ - \hline - float & float\\ - \hline - double & double\\ - \hline - int & int\\ - \hline - long & long\\ - \hline - long long & longlong\\ - \hline + \TYPE & \TYPENAME \\ \hline + float & float \\ \hline + double & double \\ \hline + int & int \\ \hline + long & long \\ \hline + long long & longlong \\ \hline + unsigned int & uint \\ \hline + unsigned long & ulong \\ \hline + unsigned long long & ulonglong \\ \hline + int32\_t & int32 \\ \hline + int64\_t & int64 \\ \hline + uint32\_t & uint32 \\ \hline + uint64\_t & uint64 \\ \hline + size\_t & size \\ \hline + ptrdiff\_t & ptrdiff \\ \hline \end{tabular} - \caption{Extended \ac{AMO} Types and Names} + \TableCaptionRef{Extended \ac{AMO} Types and Names} \label{extamotypes} - \end{center} + \end{center} +\end{table} + +\begin{table}[h] + \begin{center} + \begin{tabular}{|l|l|} + \hline + \TYPE & \TYPENAME \\ \hline + unsigned int & uint \\ \hline + unsigned long & ulong \\ \hline + unsigned long long & ulonglong \\ \hline + int32\_t & int32 \\ \hline + int64\_t & int64 \\ \hline + uint32\_t & uint32 \\ \hline + uint64\_t & uint64 \\ \hline + \end{tabular} + \TableCaptionRef{Bitwise \ac{AMO} Types and Names} + \label{bitamotypes} + \end{center} \end{table} diff --git a/content/backmatter.tex b/content/backmatter.tex index 780464afb..f2b4b7904 100644 --- a/content/backmatter.tex +++ b/content/backmatter.tex @@ -1,4 +1,4 @@ -\clearpage +\clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \appendix @@ -8,69 +8,79 @@ \fancyhf{} \fancyhead[RE, LO]{\leftmark} \fancyhead[RO, LE]{\thepage} -\fancyfoot[CE,CO]{\thepage} +\fancyfoot[CE, CO]{\thepage} \renewcommand{\headrulewidth}{0pt} -\chapter{Writing \openshmem Programs} -\section*{Incorporating \openshmem{} into Programs}\label{sec:writing_programs} +\chapter{Writing OpenSHMEM Programs} +\section*{Incorporating OpenSHMEM into Programs}\label{sec:writing_programs} -In this section, we describe how to write a ``Hello World" \openshmem program. -To write a ``Hello World" \openshmem program we need to: +The following section describes how to write a ``Hello World" \openshmem program. +To write a ``Hello World" \openshmem program, the user must: \begin{itemize} -\item Add the include file shmem.h (for \Clang) or shmem.fh (for \Fortran). -\item Add the initialization call \FUNC{shmem\_init}, (line 9). -\item Use OpenSHMEM calls to query the the total number of PEs (line 10) and PE - id (line 11). -\item There is no explicit finalize call; either a return from \texttt{main()} - (line 13) or an explicit \texttt{exit()} acts as an implicit \openshmem - finalization. -\item In \openshmem the order in which lines appear in the output is not fixed - as \ac{PE}s execute asynchronously in parallel. +\item Include the header file \HEADER{shmem.h} for \Cstd or \HEADER{shmem.fh} for \Fortran. +\item Add the initialization call \hyperref[subsec:shmem_init]{\FUNC{shmem\_init}}. +\item Use \openshmem calls to query the local \ac{PE} number + (\hyperref[subsec:shmem_my_pe]{\FUNC{shmem\_my\_pe}}) and the total number + of \acp{PE} (\hyperref[subsec:shmem_n_pes]{\FUNC{shmem\_n\_pes}}). +\item Add the finalization call \hyperref[subsec:shmem_finalize]{\FUNC{shmem\_finalize}}. \end{itemize} +In \openshmem, the order in which lines appear in the output is not +deterministic because \acp{PE} execute asynchronously in parallel. + \begin{minipage}{\linewidth} \vspace{0.1in} -\numberedlisting{label=openshmem-hello,language=OSH2+C} +\numberedlisting{caption={``Hello World'' example program in \Cstd},label=openshmem-hello,language=OSH2+C} {example_code/hello-openshmem.c} -\outputlisting{language=bash,caption={Expected Output (4 processors)}} +\outputlisting{language=bash,caption={Possible ordering of expected output with 4 \acp{PE} from the program in Listing~\ref{openshmem-hello}}} {example_code/hello-openshmem-c.output} \vspace{0.1in} \end{minipage} -\openshmem also has a \Fortran{} API, so for completeness we will now give the -same program written in \Fortran, in listing~\ref{openshmem-hello-f90}: +\clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +\begin{deprecate} +\openshmem also provides a \Fortran API. Listing~\ref{openshmem-hello-f90} shows a similar program written in \Fortran. \begin{minipage}{\linewidth} \vspace{0.1in} -\numberedlisting{label=openshmem-hello-f90,language=OSH2+F} +\numberedlisting{caption={``Hello World'' example program in \Fortran},label=openshmem-hello-f90,language=OSH2+F} {example_code/hello-openshmem.f90} -\outputlisting{language=bash,caption={Expected Output (4 processors)}} +\outputlisting{language=bash,caption={Possible ordering of expected output with 4 \acp{PE} from the program in Listing~\ref{openshmem-hello-f90}}} {example_code/hello-openshmem-f90.output} \vspace{0.1in} \end{minipage} +\end{deprecate} + +\clearpage %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -The following example shows a more complex \openshmem program that illustrates -the use of symmetric data objects. Note the declaration of the \VAR{static -short dest} array and its use as the remote destination in \openshmem short -\PUT. The use of the \VAR{static} keyword results in the \VAR{dest} array being -symmetric on \ac{PE} \CONST{0} and \ac{PE} \CONST{1}. Each \ac{PE} is able to -transfer data to the \dest{} array by simply specifying the local address of the -symmetric data object which is to receive the data. This aids programmability, -as the address of the \dest{} need not be exchanged with the active side -(\ac{PE} \CONST{0}) prior to the RMA (Remote Memory Access) routine. -Conversely, the declaration of the \VAR{short source} array is asymmetric. -Because the \PUT{} handles the references to the \VAR{source} array only on the -active (local) side, the asymmetric \source{} object is handled correctly. +The example in Listing~\ref{openshmem-hello-symmetric} shows a more complex +\openshmem program that illustrates the use of symmetric data objects. +Note the declaration of the \VAR{static short dest} array and its use as the +remote destination in \hyperref[subsec:shmem_put]{\FUNC{shmem\_put}}. + +The \VAR{static} keyword makes the \VAR{dest} array symmetric on all \acp{PE}. +Each \ac{PE} is able to transfer data to a remote \dest{} array by simply +specifying to an OpenSHMEM routine such as \hyperref[subsec:shmem_put]{\FUNC{shmem\_put}} +the local address of the symmetric data object that will receive the data. +This local address resolution aids programmability because the address of the +\dest{} need not be exchanged with the active side (\ac{PE} \CONST{0}) prior to +the \acf{RMA} routine. + +Conversely, the declaration of the \VAR{short source} array is asymmetric +(local only). +The \source{} object does not need to be symmetric because \PUT{} handles the +references to the \VAR{source} array only on the active (local) side. \begin{minipage}{\linewidth} \vspace{0.1in} -\numberedlisting{label=openshmem-hello,language=OSH2+C} +\numberedlisting{caption={Example program with symmetric data objects},label=openshmem-hello-symmetric,language=OSH2+C} {example_code/writing_shmem_example.c} -\outputlisting{language=bash,caption={Expected Output (4 processors)}} +\outputlisting{language=bash,caption={Possible ordering of expected output with 4 \acp{PE} from the program in Listing~\ref{openshmem-hello-symmetric}}} {example_code/writing_shmem_example.output} \vspace{0.1in} \end{minipage} @@ -79,64 +89,66 @@ \section*{Incorporating \openshmem{} into Programs}\label{sec:writing_programs} \chapter{Compiling and Running Programs}\label{sec:compiling} -As of this writing, the \openshmem{} specification is silent regarding how -\openshmem{} programs are compiled, linked and run. This section shows some -examples of how wrapper programs are utilized in the \openshmem{} Reference +The \openshmem Specification does not specify how +\openshmem programs are compiled, linked, and run. This section shows some +examples of how wrapper programs are utilized in the \openshmem Reference Implementation to compile and launch programs. \section{Compilation} -\subsection*{Programs written in \Clang} +\subsection*{Programs written in \Cstd} -The \openshmem{} Reference Implementation provides a wrapper program named -\textbf{oshcc}, to aid in the compilation of \Clang{} programs, the wrapper -could be called as follows: +The \openshmem Reference Implementation provides a wrapper program, named +\textbf{oshcc}, to aid in the compilation of \Cstd programs. +The wrapper may be called as follows: \begin{lstlisting}[language=bash] oshcc -o myprogram myprogram.c \end{lstlisting} Where the $\langle\mbox{compiler options}\rangle$ are options understood by the -underlying \Clang{} compiler. +underlying \Cstd compiler called by \textbf{oshcc}. \subsection*{Programs written in \Cpp} -The \openshmem{} Reference Implementation provides a wrapper program named -\textbf{oshCC}, to aid in the compilation of \Cpp{} programs, the wrapper could -be called as follows: +The \openshmem Reference Implementation provides a wrapper program, named +\textbf{oshc++}, to aid in the compilation of \Cpp programs. +The wrapper may be called as follows: \begin{lstlisting}[language=bash] -oshCC -o myprogram myprogram.cpp +oshc++ -o myprogram myprogram.cpp \end{lstlisting} Where the $\langle\mbox{compiler options}\rangle$ are options understood by the -underlying \Cpp{} compiler called by \textbf{oshCC}. +underlying \Cpp compiler called by \textbf{oshc++}. \subsection*{Programs written in \Fortran} -The \openshmem{} Reference Implementation provides a wrapper program named -\textbf{oshfort}, to aid in the compilation of \Fortran{} programs, the wrapper -could be called as follows: +\begin{deprecate} +The \openshmem Reference Implementation provides a wrapper program, named +\textbf{oshfort}, to aid in the compilation of \Fortran programs. +The wrapper may be called as follows: \begin{lstlisting}[language=bash] oshfort -o myprogram myprogram.f \end{lstlisting} Where the $\langle\mbox{compiler options}\rangle$ are options understood by the -underlying \Fortran{} compiler called by \textbf{oshfort}. +underlying \Fortran compiler called by \textbf{oshfort}. +\end{deprecate} \section{Running Programs} -The \openshmem{} Reference Implementation provides a wrapper program named -\textbf{oshrun}, to launch \openshmem programs, the wrapper could be called as -follows: +The \openshmem Reference Implementation provides a wrapper program, named +\textbf{oshrun}, to launch \openshmem programs. +The wrapper may be called as follows: \begin{lstlisting}[language=bash] -oshrun -np <#> +oshrun -np <#> \end{lstlisting} -The program arguments for \textbf{oshrun} are: +The arguments for \textbf{oshrun} are: \begin{tabular}{p{0.3\textwidth}p{0.6\textwidth}} -$\langle\mbox{additional options}\rangle$ & {Options passed to the underlying launcher.}\tabularnewline --np $\langle\mbox{\#}\rangle$ & {The number of \ac{PE}s to be used in the execution.}\tabularnewline +$\langle\mbox{runner options}\rangle$ & {Options passed to the underlying launcher.}\tabularnewline +-np $\langle\mbox{\#}\rangle$ & {The number of \acp{PE} to be used in the execution.}\tabularnewline $\langle\mbox{program}\rangle$ & {The program executable to be launched.}\tabularnewline $\langle\mbox{program arguments}\rangle$ & {Flags and other parameters to pass to the program.}\tabularnewline \end{tabular} @@ -144,66 +156,61 @@ \section{Running Programs} +\chapter{Undefined Behavior in OpenSHMEM}\label{sec:undefined} -\chapter{Undefined Behavior in \openshmem}\label{sec:undefined} - -The specification provides guidelines to the expected behavior of -various library routines. In cases where routines are improperly used -or the input is not in accordance with the specification, undefined -behavior may be observed. Depending on the implementation there are -many interpretations of undefined behavior. +The \openshmem Specification formalizes the expected behavior of +its library routines. In cases where routines are improperly used +or the input is not in accordance with the Specification, the behavior +is undefined. -$\;$ - -$ $% -\begin{tabular}{|>{\raggedright}p{0.3\textwidth}|>{\raggedright}p{0.6\textwidth}|} +\begin{longtable}{|>{\raggedright}p{0.3\textwidth}|>{\raggedright}p{0.6\textwidth}|} \hline \textbf{Inappropriate Usage} & \textbf{Undefined Behavior}\tabularnewline \hline -\hline -Uninitialized library & If \openshmem is not initialized through a call to -\FUNC{shmem\_init}, subsequent accesses to \openshmem routines have undefined -results. An implementation may choose, for example, to try to continue or abort -immediately upon the first call to an uninitialized routine.\tabularnewline -\hline -Accessing non-existent \ac{PE}s & If a communications routine accesses a -non-existent \ac{PE}, then the \openshmem library can choose to handle this -situation in an implementation-defined way. For example, the library may issue +\endhead +Uninitialized library & If the \openshmem library is not initialized, +calls to non-initializing \openshmem routines have undefined +behavior. For example, an implementation may try to continue or may abort +immediately upon an \openshmem call into the uninitialized library. +\tabularnewline +\hline +Multiple calls to initialization routines & In an \openshmem program where +the initialization routines \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} +have already been called, any subsequent calls to these initialization routines +result in undefined behavior. +\tabularnewline +\hline +Accessing non-existent \acp{PE} & If a communications routine accesses a +non-existent \ac{PE}, then the \openshmem library may handle this +situation in an implementation-defined way. For example, the library may report an error message saying that the \ac{PE} accessed is outside the range of -accessible \ac{PE}s, or may exit without a warning.\tabularnewline +accessible \acp{PE}, or may exit without a warning.\tabularnewline \hline Use of non-symmetric variables & Some routines require remotely accessible -variables to perform their function. A \PUT{} to a non-symmetric variable can -be trapped where possible and the library can abort the program. Another -implementation may choose to continue either with a warning or -silently.\tabularnewline +variables to perform their function. For example, a \PUT{} to a non-symmetric variable may +be trapped where possible and the library may abort the program. Another +implementation may choose to continue execution with or without a warning. +\tabularnewline \hline -Non-symmetric variables & The symmetric memory management routines are -collectives, which means that all \ac{PE}s in the program must issue the same -\FUNC{shmem\_malloc} call with the same size request. Program behavior after a +Non-symmetric allocation of symmetric memory & The symmetric memory management routines are +collectives. For example, all \acp{PE} in the program must call +\FUNC{shmem\_malloc} with the same \VAR{size} argument. Program behavior after a mismatched \FUNC{shmem\_malloc} call is undefined.\tabularnewline \hline -Use of NULL pointers with non-zero \VAR{len} specified & In any \openshmem routine +Use of null pointers with non-zero \VAR{len} specified & In any \openshmem routine that takes a pointer and \VAR{len} describing the number of elements in that -pointer, NULL may not be specified for the pointer unless the corresponding \VAR{len} is also +pointer, a null pointer may not be given unless the corresponding \VAR{len} is also specified as zero. Otherwise, the resulting behavior is undefined. The following cases summarize this behavior: \begin{itemize} - \item \VAR{len} is 0, pointer is NULL: supported. - \item \VAR{len} is not 0, pointer is NULL: undefined behavior. - \item \VAR{len} is 0, pointer is not NULL: supported. - \item \VAR{len} is not 0, pointer is not NULL: supported. + \item \VAR{len} is 0, pointer is null: supported. + \item \VAR{len} is not 0, pointer is null: undefined behavior. + \item \VAR{len} is 0, pointer is non-null: supported. + \item \VAR{len} is not 0, pointer is non-null: supported. \end{itemize} \tabularnewline \hline -Multiple calls to \FUNC{shmem\_init} & In an OpenSHMEM program where -\FUNC{shmem\_init} has already be called, any subsequent calls to -\FUNC{shmem\_init} result in undefined behavior.\tabularnewline -\hline -\end{tabular} - - - +\end{longtable} @@ -214,99 +221,79 @@ \section{\ac{MPI} Interoperability} \begin{sloppypar} % to prevent constants from running into margins. % -\openshmem routines can be used in conjunction with \ac{MPI} routines in the -same program. For example, on SGI systems, programs that use both \ac{MPI} and +\openshmem routines may be used in conjunction with \ac{MPI} routines in the +same program. For example, on \ac{SGI} systems, programs that use both \ac{MPI} and \openshmem routines call \FUNC{MPI\_Init} and \FUNC{MPI\_Finalize} but omit the call to the \FUNC{shmem\_init} routine. \openshmem \ac{PE} numbers are equal to the \ac{MPI} rank within the \CONST{MPI\_COMM\_WORLD} environment variable. -Note that this precludes use of \openshmem routines between processes in -different \CONST{MPI\_COMM\_WORLD}s. \ac{MPI} processes started using the -\FUNC{MPI\_Comm\_spawn} routine, for example, cannot use \openshmem routines to +Note that this indexing precludes use of \openshmem routines between processes in +different \CONST{MPI\_COMM\_WORLD}s. For example, \ac{MPI} processes started using the +\FUNC{MPI\_Comm\_spawn} routine cannot use \openshmem routines to communicate with their parent \ac{MPI} processes. % \end{sloppypar} % -On SGI systems where \ac{MPI} jobs use TCP/sockets for inter-host communication, -\openshmem routines can be used to communicate with processes running on the -same host. The \FUNC{shmem\_pe\_accessible} routine can be used to determine if +On \ac{SGI} systems where \ac{MPI} jobs use \ac{TCP}/sockets for inter-host communication, +\openshmem routines may be used to communicate with processes running on the +same host. The \FUNC{shmem\_pe\_accessible} routine should be used to determine if a remote \ac{PE} is accessible via \openshmem communication from the local \ac{PE}. When running an \ac{MPI} program involving multiple executable files, -\openshmem routines can be used to communicate with processes running from the +\openshmem routines may be used to communicate with processes running from the same or different executable files, provided that the communication is limited -to symmetric data objects. On these systems, static memory such as a -\Fortran{} common block or \Clang{} global variable, is symmetric between +to symmetric data objects. On these systems, static memory---such as a +\Fortran common block or \Cstd global variable---is symmetric between processes running from the same executable file, but is not symmetric between processes running from different executable files. Data allocated from the -symmetric heap (\FUNC{shmem\_malloc} or \FUNC{shpalloc}) is symmetric across the -same or different executable files. The routine \FUNC{shmem\_addr\_accessible} -can be used to determine if a local address is accessible via \openshmem +symmetric heap (e.g., \FUNC{shmem\_malloc}, \FUNC{shpalloc}) is symmetric across the +same or different executable files. The \FUNC{shmem\_addr\_accessible} routine +should be used to determine if a local address is accessible via \openshmem communication from a remote \ac{PE}. Another important feature of these systems is that the \FUNC{shmem\_pe\_accessible} routine returns \CONST{TRUE} only if the remote -\ac{PE} is a process running from the same executable file as the local PE, +\ac{PE} is a process running from the same executable file as the local \ac{PE}, indicating that full \openshmem support (static memory and symmetric heap) is available. When using \openshmem routines within an \ac{MPI} program, the use -of \ac{MPI} memory placement environment variables is required when using -non-default memory placement options. - -\clearpage +of \ac{MPI} memory-placement environment variables is required when using +non-default memory-placement options. +\chapter{History of OpenSHMEM}\label{sec:openshmem_history} - - -\chapter{History of \openshmem}\label{sec:openshmem_history} - -SHMEM has a long history as a parallel programming model, having been used -extensively on a number of products since 1993, including Cray T3D, Cray X1E, -the Cray XT3/4, SGI Origin, SGI Altix, clusters based on the Quadrics -interconnect, and to a very limited extent, Infiniband based clusters. +SHMEM has a long history as a parallel-programming model and has been +extensively used on a number of products since 1993, including the Cray T3D, +Cray X1E, Cray XT3 and XT4, \ac{SGI} Origin, \ac{SGI} Altix, Quadrics-based +clusters, and InfiniBand-based clusters. \begin{itemize} -\item A SHMEM Timeline +\item SHMEM Timeline \begin{itemize} \item Cray SHMEM \begin{itemize} - \item SHMEM first introduced by Cray Research Inc. in 1993 for Cray T3D - \item Cray is acquired by SGI in 1996 - \item Cray is acquired by Tera in 2000 (MTA) + \item SHMEM first introduced by Cray Research, Inc.\ in 1993 for Cray T3D + \item Cray was acquired by \ac{SGI} in 1996 + \item Cray was acquired by Tera in 2000 (MTA) \item Platforms: Cray T3D, T3E, C90, J90, SV1, SV2, X1, X2, XE, XMT, XT \end{itemize} - \item SGI SHMEM + \item \ac{SGI} SHMEM \begin{itemize} - \item SGI purchases Cray Research Inc. and SHMEM was integrated into - SGI's Message Passing Toolkit (MPT) - \item SGI currently owns the rights to SHMEM and \openshmem + \item \ac{SGI} acquired Cray Research, Inc.\ and SHMEM was integrated into + \ac{SGI}'s Message Passing Toolkit (MPT) + \item \ac{SGI} currently owns the rights to SHMEM and \openshmem \item Platforms: Origin, Altix 4700, Altix XE, ICE, UV - \item SGI was purchased by Rackable Systems in 2009 - \item SGI and Open Source Software Solutions, Inc. (OSSS) signed a - SHMEM trademark licensing agreement, in 2010 - \end{itemize} - \item Other Implementations - \begin{itemize} - \item Quadrics (Vega UK, Ltd.) - \item Hewlett Packard - \item GPSHMEM - \item IBM - \item QLogic - \item Mellanox - % \item University of Houston - \item University of Florida + \item \ac{SGI} was acquired by Rackable Systems in 2009 + \item \ac{SGI} and \ac{OSSS} signed a + SHMEM trademark licensing agreement in 2010 + \item \ac{HPE} acquired {SGI} in 2016 \end{itemize} \end{itemize} -\item OpenSHMEM Implementations - \begin{itemize} - \item SGI \openshmem - \item University of Houston - \openshmem Reference Implementation - \item Mellanox ScalableSHMEM - \item Portals-SHMEM - \item IBM OpenSHMEM - \end{itemize} \end{itemize} +A listing of \openshmem implementations can be found on +\url{http://www.openshmem.org/}. + @@ -314,36 +301,247 @@ \chapter{History of \openshmem}\label{sec:openshmem_history} -\chapter{\openshmem Specification and Deprecated API}\label{sec:dep_api} +\chapter{OpenSHMEM Specification and Deprecated API}\label{sec:dep_api} -For the \openshmem Specification(s), deprecation is the process of identifying -API that is supported but no longer recommended for use by program users. For -\openshmem library users, said API \textbf{must} be supported until clearly -indicated as otherwise by the Specification. In this chapter we will record the -API that has been deprecated, the \openshmem Specification that effected the -deprecation, and if the feature is supported in the current version of the -specification. +\section{Overview}\label{subsec:dep_overview} +\TableIndex{Deprecated API} +For the \openshmem Specification, deprecation is the process of identifying +API that is supported but no longer recommended for use by users. +The deprecated API \textbf{must} be supported until clearly +indicated as otherwise by the Specification. +This chapter records the API or functionality that have been deprecated, the +version of the \openshmem Specification that effected the deprecation, and the +most recent version of the \openshmem Specification in which the feature was +supported before removal. \begin{center} -\begin{tabular}{|l|c|c|c|} +\scriptsize +\begin{longtable}{|l|c|c|l|} \hline - \textbf{Deprecated API} & \textbf{Deprecated Since} - & \textbf{Currently Supported(?)} & \textbf{Replaced By}\\ - \hline %There may be better table headings... - \FUNC{\_my\_pe} & 1.2 & Yes & \FUNC{shmem\_my\_pe} \\ \hline - \FUNC{\_num\_pes} & 1.2 & Yes & \FUNC{shmem\_n\_pes} \\ \hline - \FUNC{shmalloc} & 1.2 & Yes & \FUNC{shmem\_malloc} \\ \hline - \FUNC{shfree} & 1.2 & Yes & \FUNC{shmem\_free} \\ \hline - \FUNC{shrealloc} & 1.2 & Yes & \FUNC{shmem\_realloc} \\ \hline - \FUNC{shmemalign} & 1.2 & Yes & \FUNC{shmem\_align} \\ \hline - \FUNC{start\_pes} & 1.2 & Yes & \FUNC{shmem\_init} \\ \hline - \FUNC{SHMEM\_PUT} & 1.2 & Yes & \FUNC{SHMEM\_PUT8} or \FUNC{SHMEM\_PUT64} \\ \hline - \FUNC{SHMEM\_CACHE} & 1.3 & Yes & (none) \\ \hline - \_SHMEM\_* constants & 1.3 & Yes & (none) \\ \hline + \textbf{Deprecated API} + & \textbf{Deprecated Since} + & \textbf{Last Version Supported} + & \textbf{Replaced By} \\ \hline - \end{tabular} + \endhead + Header Directory: \hyperref[subsec:dep_rationale:mpp]{\HEADER{mpp}} & 1.1 & Current & (none) \\ \hline + \CorCpp: \hyperref[subsec:start_pes]{\FuncRef{start\_pes}} & 1.2 & Current & \hyperref[subsec:shmem_init]{\FUNC{shmem\_init}} \\ \hline + \Fortran: \hyperref[subsec:start_pes]{\FuncRef{START\_PES}} & 1.2 & Current & \hyperref[subsec:shmem_init]{\FUNC{SHMEM\_INIT}} \\ \hline + \hyperref[subsec:start_pes]{Implicit finalization} & 1.2 & Current & \hyperref[subsec:shmem_finalize]{\FUNC{shmem\_finalize}} \\ \hline + \CorCpp: \FuncRef{\_my\_pe} & 1.2 & Current & \hyperref[subsec:shmem_my_pe]{\FUNC{shmem\_my\_pe}} \\ \hline + \CorCpp: \FuncRef{\_num\_pes} & 1.2 & Current & \hyperref[subsec:shmem_n_pes]{\FUNC{shmem\_n\_pes}} \\ \hline + \Fortran: \FuncRef{MY\_PE} & 1.2 & Current & \hyperref[subsec:shmem_my_pe]{\FUNC{SHMEM\_MY\_PE}} \\ \hline + \Fortran: \FuncRef{NUM\_PES} & 1.2 & Current & \hyperref[subsec:shmem_n_pes]{\FUNC{SHMEM\_N\_PES}} \\ \hline + \CorCpp: \FuncRef{shmalloc} & 1.2 & Current & \hyperref[subsec:shfree]{\FUNC{shmem\_malloc}} \\ \hline + \CorCpp: \FuncRef{shfree} & 1.2 & Current & \hyperref[subsec:shfree]{\FUNC{shmem\_free}} \\ \hline + \CorCpp: \FuncRef{shrealloc} & 1.2 & Current & \hyperref[subsec:shfree]{\FUNC{shmem\_realloc}} \\ \hline + \CorCpp: \FuncRef{shmemalign} & 1.2 & Current & \hyperref[subsec:shfree]{\FUNC{shmem\_align}} \\ \hline + \Fortran: \FuncRef{SHMEM\_PUT} & 1.2 & Current & \hyperref[subsec:shmem_put]{\FUNC{SHMEM\_PUT8} or \FUNC{SHMEM\_PUT64}} \\ \hline + \minitab{\CorCpp: \hyperref[subsec:shmem_cache]{\FuncRef{shmem\_clear\_cache\_inv}} + \\ \Fortran: \hyperref[subsec:shmem_cache]{\FuncRef{SHMEM\_CLEAR\_CACHE\_INV}}} + & 1.3 & Current & (none) \\ \hline + \CorCpp: \hyperref[subsec:shmem_cache]{\FuncRef{shmem\_clear\_cache\_line\_inv}} & 1.3 & Current & (none) \\ \hline + \minitab{\CorCpp: \hyperref[subsec:shmem_cache]{\FuncRef{shmem\_set\_cache\_inv}} + \\ \Fortran: \hyperref[subsec:shmem_cache]{\FuncRef{SHMEM\_SET\_CACHE\_INV}}} + & 1.3 & Current & (none) \\ \hline + \minitab{\CorCpp: \hyperref[subsec:shmem_cache]{\FuncRef{shmem\_set\_cache\_line\_inv}} + \\ \Fortran: \hyperref[subsec:shmem_cache]{\FuncRef{SHMEM\_SET\_CACHE\_LINE\_INV}}} + & 1.3 & Current & (none) \\ \hline + \minitab{\CorCpp: \hyperref[subsec:shmem_cache]{\FuncRef{shmem\_udcflush}} + \\ \Fortran: \hyperref[subsec:shmem_cache]{\FuncRef{SHMEM\_UDCFLUSH}}} + & 1.3 & Current & (none) \\ \hline + \minitab{\CorCpp: \hyperref[subsec:shmem_cache]{\FuncRef{shmem\_udcflush\_line}} + \\ \Fortran: \hyperref[subsec:shmem_cache]{\FuncRef{SHMEM\_UDCFLUSH\_LINE}}} + & 1.3 & Current & (none) \\ \hline + \LibConstRef{\_SHMEM\_SYNC\_VALUE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_SYNC\_VALUE}} \\ \hline + \LibConstRef{\_SHMEM\_BARRIER\_SYNC\_SIZE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_BARRIER\_SYNC\_SIZE}} \\ \hline + \LibConstRef{\_SHMEM\_BCAST\_SYNC\_SIZE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_BCAST\_SYNC\_SIZE}} \\ \hline + \LibConstRef{\_SHMEM\_COLLECT\_SYNC\_SIZE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_COLLECT\_SYNC\_SIZE}} \\ \hline + \LibConstRef{\_SHMEM\_REDUCE\_SYNC\_SIZE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_REDUCE\_SYNC\_SIZE}} \\ \hline + \LibConstRef{\_SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}} \\ \hline + \LibConstRef{\_SHMEM\_MAJOR\_VERSION} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_MAJOR\_VERSION}} \\ \hline + \LibConstRef{\_SHMEM\_MINOR\_VERSION} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_MINOR\_VERSION}} \\ \hline + \LibConstRef{\_SHMEM\_MAX\_NAME\_LEN} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_MAX\_NAME\_LEN}} \\ \hline + \LibConstRef{\_SHMEM\_VENDOR\_STRING} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_VENDOR\_STRING}} \\ \hline + \LibConstRef{\_SHMEM\_CMP\_EQ} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_CMP\_EQ}} \\ \hline + \LibConstRef{\_SHMEM\_CMP\_NE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_CMP\_NE}} \\ \hline + \LibConstRef{\_SHMEM\_CMP\_LT} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_CMP\_LT}} \\ \hline + \LibConstRef{\_SHMEM\_CMP\_LE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_CMP\_LE}} \\ \hline + \LibConstRef{\_SHMEM\_CMP\_GT} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_CMP\_GT}} \\ \hline + \LibConstRef{\_SHMEM\_CMP\_GE} & 1.3 & Current & \hyperref[subsec:library_constants]{\CONST{SHMEM\_CMP\_GE}} \\ \hline + \EnvVarRef{SMA\_VERSION} & 1.4 & Current & \hyperref[subsec:environment_variables]{\VAR{SHMEM\_VERSION}} \\ \hline + \EnvVarRef{SMA\_INFO} & 1.4 & Current & \hyperref[subsec:environment_variables]{\VAR{SHMEM\_INFO}} \\ \hline + \EnvVarRef{SMA\_SYMMETRIC\_SIZE} & 1.4 & Current & \hyperref[subsec:environment_variables]{\VAR{SHMEM\_SYMMETRIC\_SIZE}} \\ \hline + \EnvVarRef{SMA\_DEBUG} & 1.4 & Current & \hyperref[subsec:environment_variables]{\VAR{SHMEM\_DEBUG}} \\ \hline + \minitab{\CorCpp: \FuncRef{shmem\_wait} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_wait}} + & 1.4 & Current & See \textbf{Notes} for \hyperref[subsec:shmem_wait_until]{\FUNC{shmem\_wait\_until}} \\ \hline + \CorCpp: \FuncRef{shmem\_wait\_until} & 1.4 & Current + & \Cstd[11]: \hyperref[subsec:shmem_wait_until]{\FUNC{shmem\_wait\_until}}, \CorCpp: \hyperref[subsec:shmem_wait_until]{\FUNC{shmem\_long\_wait\_until}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_fetch} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_fetch}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_fetch]{\FUNC{shmem\_atomic\_fetch}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_set} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_set}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_set]{\FUNC{shmem\_atomic\_set}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_cswap} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_cswap}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_compare_swap]{\FUNC{shmem\_atomic\_compare\_swap}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_swap} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_swap}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_swap]{\FUNC{shmem\_atomic\_swap}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_finc} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_finc}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_fetch_inc]{\FUNC{shmem\_atomic\_fetch\_inc}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_inc} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_inc}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_inc]{\FUNC{shmem\_atomic\_inc}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_fadd} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_fadd}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_fetch_add]{\FUNC{shmem\_atomic\_fetch\_add}} \\ \hline + \minitab{\Cstd[11]: \FuncRef{shmem\_add} + \\ \CorCpp: \FuncRef{shmem\_\FuncParam{TYPENAME}\_add}} + & 1.4 & Current & \hyperref[subsec:shmem_atomic_add]{\FUNC{shmem\_atomic\_add}} \\ \hline + Entire \Fortran API & 1.4 & Current & (none) \\ \hline + \end{longtable} +\end{center} + +\section{Deprecation Rationale}\label{subsec:dep_rationale} + +\subsection{Header Directory: \HEADER{mpp}} +\label{subsec:dep_rationale:mpp} +In addition to the default system header paths, \openshmem implementations +must provide all \openshmem-specified header files from the \HEADER{mpp} +header directory such that these headers can be referenced in \CorCpp as +\begin{lstlisting}[language=] +#include +#include +\end{lstlisting} +and in \Fortran as +\begin{lstlisting}[language=] +include 'mpp/shmem.fh' +include 'mpp/shmemx.fh' +\end{lstlisting} +for backwards compatibility with \ac{SGI} SHMEM. + +\subsection{\CorCpp: \FUNC{start\_pes}} +The \CorCpp routine \FUNC{start\_pes} includes an unnecessary initialization +argument that is remnant of historical \emph{SHMEM} implementations and no +longer reflects the requirements of modern \openshmem implementations. +Furthermore, the naming of \FUNC{start\_pes} does not include the standardized +\shmemprefixLC{} naming prefix. This routine has been deprecated and +\openshmem users are encouraged to use \FUNC{shmem\_init} instead. + +\subsection{Implicit Finalization} +Implicit finalization was deprecated and replaced with explicit finalization using the +\FUNC{shmem\_finalize} routine. Explicit finalization improves portability and +also improves interoperability with profiling and debugging tools. + +\subsection{\CorCpp: \FUNC{\_my\_pe}, \FUNC{\_num\_pes}, \FUNC{shmalloc}, + \FUNC{shfree}, \FUNC{shrealloc}, \FUNC{shmemalign}} +The \CorCpp routines \FUNC{\_my\_pe}, \FUNC{\_num\_pes}, \FUNC{shmalloc}, +\FUNC{shfree}, \FUNC{shrealloc}, and \FUNC{shmemalign} were deprecated in order +to normalize the \openshmem \ac{API} to use \shmemprefixLC{} as the standard +prefix for all routines. + +\subsection{\textit{Fortran}: \FUNC{START\_PES}, \FUNC{MY\_PE}, \FUNC{NUM\_PES}} %% WARNING: Issue #66. +The \Fortran routines \FUNC{START\_PES}, \FUNC{MY\_PE}, and \FUNC{NUM\_PES} +were deprecated in order to minimize the API differences from the deprecation +of \CorCpp routines \FUNC{start\_pes}, \FUNC{\_my\_pe}, and \FUNC{\_num\_pes}. + +\subsection{\textit{Fortran}: \FUNC{SHMEM\_PUT}} %% WARNING: Issue #66. +The \Fortran routine \FUNC{SHMEM\_PUT} is defined only for the \Fortran +\ac{API} and is semantically identical to \Fortran routines +\FUNC{SHMEM\_PUT8} and \FUNC{SHMEM\_PUT64}. Since \FUNC{SHMEM\_PUT8} and +\FUNC{SHMEM\_PUT64} have defined equivalents in the \CorCpp interface, +\FUNC{SHMEM\_PUT} is ambiguous and has been deprecated. + +\subsection{SHMEM\_CACHE} +The \FUNC{SHMEM\_CACHE} \ac{API} +\begin{center} +\begin{tabular}{ll} + \CorCpp: & \Fortran: \\ + \FUNC{shmem\_clear\_cache\_inv} & \FUNC{SHMEM\_CLEAR\_CACHE\_INV} \\ + \FUNC{shmem\_set\_cache\_inv} & \FUNC{SHMEM\_SET\_CACHE\_INV} \\ + \FUNC{shmem\_set\_cache\_line\_inv} & \FUNC{SHMEM\_SET\_CACHE\_LINE\_INV} \\ + \FUNC{shmem\_udcflush} & \FUNC{SHMEM\_UDCFLUSH} \\ + \FUNC{shmem\_udcflush\_line} & \FUNC{SHMEM\_UDCFLUSH\_LINE} \\ + \FUNC{shmem\_clear\_cache\_line\_inv} \\ +\end{tabular} +\end{center} +was originally implemented for systems with cache-management instructions. +This API has largely gone unused on cache-coherent system architectures. +\FUNC{SHMEM\_CACHE} has been deprecated. + +\subsection{\CONST{\_SHMEM\_*} Library Constants} +The library constants +\begin{center} +\begin{tabular}{ll} + \CONST{\_SHMEM\_SYNC\_VALUE} & \CONST{\_SHMEM\_MAX\_NAME\_LEN} \\ + \CONST{\_SHMEM\_BARRIER\_SYNC\_SIZE} & \CONST{\_SHMEM\_VENDOR\_STRING} \\ + \CONST{\_SHMEM\_BCAST\_SYNC\_SIZE} & \CONST{\_SHMEM\_CMP\_EQ} \\ + \CONST{\_SHMEM\_COLLECT\_SYNC\_SIZE} & \CONST{\_SHMEM\_CMP\_NE} \\ + \CONST{\_SHMEM\_REDUCE\_SYNC\_SIZE} & \CONST{\_SHMEM\_CMP\_LT} \\ + \CONST{\_SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE} & \CONST{\_SHMEM\_CMP\_LE} \\ + \CONST{\_SHMEM\_MAJOR\_VERSION} & \CONST{\_SHMEM\_CMP\_GT} \\ + \CONST{\_SHMEM\_MINOR\_VERSION} & \CONST{\_SHMEM\_CMP\_GE} \\ +\end{tabular} +\end{center} +do not adhere to the \Cstd standard's reserved identifiers and the \Cpp +standard's reserved names. These constants were deprecated and replaced +with corresponding constants of prefix \shmemprefix{} that adhere to \CorCpp{} +and \Fortran naming conventions. + +\subsection{\VAR{SMA\_*} Environment Variables}\label{subsec:deprecate-sma-env} +The environment variables \VAR{SMA\_VERSION}, \VAR{SMA\_INFO}, +\VAR{SMA\_SYMMETRIC\_SIZE}, and \VAR{SMA\_DEBUG} +were deprecated in order to normalize the \openshmem \ac{API} to use +\shmemprefix{} as the standard prefix for all environment variables. + +\subsection{\CorCpp: \FUNC{shmem\_wait}} +The \CorCpp interface for \FUNC{shmem\_wait} and \FUNC{shmem\_\FuncParam{TYPENAME}\_wait} +was identified as unintuitive with respect to +the comparison operation it performed. As \FUNC{shmem\_wait} can be trivially +replaced by \FUNC{shmem\_wait\_until} where \VAR{cmp} is +\CONST{SHMEM\_CMP\_NE}, the \FUNC{shmem\_wait} interface was deprecated in +favor of \FUNC{shmem\_wait\_until}, which makes the comparison operation +explicit and better communicates the developer's intent. + +\subsection{\CorCpp: \FUNC{shmem\_wait\_until}} +The \CTYPE{long}-typed \CorCpp routine \FUNC{shmem\_wait\_until} was deprecated +in favor of the \Cstd[11] type-generic interface of the same name or the +explicitly typed \CorCpp routine \FUNC{shmem\_long\_wait\_until}. + +\subsection{\textit{C11} and \CorCpp: \FUNC{shmem\_fetch}, \FUNC{shmem\_set}, %% Issue #66. + \FUNC{shmem\_cswap}, \FUNC{shmem\_swap}, \FUNC{shmem\_finc}, + \FUNC{shmem\_inc}, \FUNC{shmem\_fadd}, \FUNC{shmem\_add}} +The \Cstd[11] and \CorCpp interfaces for +\begin{center} +\begin{tabular}{ll} + \Cstd[11]: & \CorCpp: \\ + \FUNC{shmem\_fetch} & \FUNC{shmem\_\FuncParam{TYPENAME}\_fetch} \\ + \FUNC{shmem\_set} & \FUNC{shmem\_\FuncParam{TYPENAME}\_set} \\ + \FUNC{shmem\_cswap} & \FUNC{shmem\_\FuncParam{TYPENAME}\_cswap} \\ + \FUNC{shmem\_swap} & \FUNC{shmem\_\FuncParam{TYPENAME}\_swap} \\ + \FUNC{shmem\_finc} & \FUNC{shmem\_\FuncParam{TYPENAME}\_finc} \\ + \FUNC{shmem\_inc} & \FUNC{shmem\_\FuncParam{TYPENAME}\_inc} \\ + \FUNC{shmem\_fadd} & \FUNC{shmem\_\FuncParam{TYPENAME}\_fadd} \\ + \FUNC{shmem\_add} & \FUNC{shmem\_\FuncParam{TYPENAME}\_add} \\ +\end{tabular} \end{center} +were deprecated and replaced with +similarly named interfaces within the \FUNC{shmem\_atomic\_*} namespace +in order to more clearly identify these calls as performing atomic operations. +In addition, the abbreviated names ``cswap'', ``finc'', and ``fadd'' were +expanded for clarity to ``compare\_swap'', ``fetch\_inc'', and ``fetch\_add''. +\subsection{\textit{Fortran} API}\label{subsec:deprecate-fortran} %% WARNING: Issue #66. +The entire \openshmem \Fortran API was deprecated because of a general lack of +use and a lack of conformance with legacy \Fortran standards. In lieu of an +extensive update of the \Fortran API, \Fortran users are encouraged to +leverage the \openshmem Specification's \Cstd API through the +\Fortran--\Cstd interoperability initially standardized by \Fortran[2003]% +\footnote{Formally, \Fortran[2003] is known as ISO/IEC~1539-1:2004(E).}. @@ -351,20 +549,153 @@ \chapter{\openshmem Specification and Deprecated API}\label{sec:dep_api} \chapter{Changes to this Document}\label{sec:changelog} +\section{Version 1.4} +Major changes in \openshmem[1.4] include +multithreading support, +\emph{contexts} for communication management, +\FUNC{shmem\_sync}, +\FUNC{shmem\_calloc}, +expanded type support, +a new namespace for atomic operations, +atomic bitwise operations, +\FUNC{shmem\_test} for nonblocking point-to-point synchronization, +and \Cstd[11] type-generic interfaces for point-to-point synchronization. + +The following list describes the specific changes in \openshmem[1.4]: +\begin{itemize} +% +\item New communication management API, including \FUNC{shmem\_ctx\_create}; + \FUNC{shmem\_ctx\_destroy}; and additional RMA, AMO, and memory ordering + routines that accept \CTYPE{shmem\_ctx\_t} arguments. +\\See Section \ref{sec:ctx}. +% +\item New API \FUNC{shmem\_sync\_all} and \FUNC{shmem\_sync} to provide \ac{PE} + synchronization without completing pending communication operations. + \\See Sections \ref{subsec:shmem_sync_all} and \ref{subsec:shmem_sync}. +% +\item Clarified that the \openshmem extensions header files are required, even when empty. +\\See Section~\ref{subsec:bindings}. +% +\item Clarified that the \FUNC{SHMEM\_GET64} and \FUNC{SHMEM\_GET64\_NBI} + routines are included in the \Fortran language bindings.\\ + See Sections \ref{subsec:shmem_get} and \ref{subsec:shmem_get_nbi}. +% +\item Clarified that \FUNC{shmem\_init} must be matched with a call to + \FUNC{shmem\_finalize}. +\\See Sections \ref{subsec:shmem_init} and \ref{subsec:shmem_finalize}. +% +\item Added the \CONST{SHMEM\_SYNC\_SIZE} constant. +\\See Section \ref{subsec:library_constants}. +% +\item Added type-generic interfaces for \FUNC{shmem\_wait\_until}. +\\ See Section \ref{subsec:shmem_wait_until}. +% +\item Removed the \VAR{volatile} qualifiers from the \VAR{ivar} arguments to +\FUNC{shmem\_wait} routines and the \VAR{lock} arguments in the lock API. +\emph{Rationale: Volatile qualifiers were added to several API routines in +\openshmem[1.3]; however, they were later found to be unnecessary.} +\\ See Sections \ref{subsec:shmem_wait_until} and \ref{subsec:shmem_lock}. +% +\item Deprecated the \VAR{SMA\_}* environment variables and added equivalent +\VAR{SHMEM\_}* environment variables. +\\ See Section \ref{subsec:environment_variables}. +% +\item Added the \Cstd[11] \CTYPE{\_Noreturn} function specifier to +\FUNC{shmem\_global\_exit}. +\\ See Section \ref{subsec:shmem_global_exit}. +% +\item Clarified ordering semantics of memory ordering, point-to-point synchronization, and collective +synchronization routines. +% +\item Clarified deprecation overview and added deprecation rationale in Annex F. +\\See Section \ref{sec:dep_api}. +% +\item Deprecated header directory \HEADER{mpp}. +\\See Section \ref{sec:dep_api}. +% +\item Deprecated the \FUNC{shmem\_wait} functions and the \CTYPE{long}-typed \CorCpp \FUNC{shmem\_wait\_until} function. +\\ See Section \ref{subsec:p2p_intro}. +% +\item Added the \FUNC{shmem\_test} functions. +\\ See Section \ref{subsec:p2p_intro}. +% +\item Added the \FUNC{shmem\_calloc} function. +\\ See Section \ref{subsec:shmem_calloc}. +% +\item Introduced the thread safe semantics that define the interaction between + \openshmem routines and user threads. +\\See Section \ref{subsec:thread_support}. +% +\item Added the new routine \FUNC{shmem\_init\_thread} to initialize the + \openshmem library with one of the defined thread levels. +\\See Section \ref{subsec:shmem_init_thread}. +% +\item Added the new routine \FUNC{shmem\_query\_thread} to query the thread + level provided by the \openshmem implementation. +\\See Section \ref{subsec:shmem_query_thread}. +% +\item Clarified the semantics of \FUNC{shmem\_quiet} for a multithreaded + \openshmem \ac{PE}. +\\See Section \ref{subsec:shmem_quiet} +% +\item Revised the description of \FUNC{shmem\_barrier\_all} for a multithreaded + \openshmem \ac{PE}. +\\See Section \ref{subsec:shmem_barrier_all} +% +\item Revised the description of \FUNC{shmem\_wait} for a multithreaded + \openshmem \ac{PE}. +\\See Section \ref{subsec:shmem_wait_until} +% +\item Clarified description for \CONST{SHMEM\_VENDOR\_STRING}. +\\See Section \ref{subsec:library_constants}. +% +\item Clarified description for \CONST{SHMEM\_MAX\_NAME\_LEN}. +\\See Section \ref{subsec:library_constants}. +% +\item Clarified API description for \FUNC{shmem\_info\_get\_name}. +\\See Section \ref{subsec:shmem_info_get_name}. +% +\item Expanded the type support for RMA, AMO, and point-to-point + synchronization operations. +\\ See Tables \ref{stdrmatypes}, \ref{stdamotypes}, \ref{extamotypes}, and + \ref{p2psynctypes} +% +\item Renamed AMO operations to use \FUNC{shmem\_atomic\_*} prefix and + deprecated old AMO routines. +\\ See Section \ref{sec:amo}. +% +\item Added fetching and non-fetching bitwise AND, OR, and XOR atomic + operations. +\\ See Section \ref{sec:amo}. +% +\item Deprecated the entire \Fortran API. +% +\item Replaced the \CTYPE{complex} macro in complex-typed reductions with the + \Cstd[99] (and later) type specifier \CTYPE{\_Complex} to remove an + implicit dependence on \HEADER{complex.h}. +\\ See Section \ref{subsec:shmem_reductions}. +% +\item Clarified that complex-typed reductions in C are optionally supported. +\\ See Section \ref{subsec:shmem_reductions}. +% +\end{itemize} + -\section{Version 1.3} -This section summarizes the changes from the \openshmem specification Version -1.2 to Version 1.3. Many major changes to the specification was introduced in Version 1.3. This includes non-blocking RMA operations, -generic interfaces for various OpenSHMEM interfaces, atomic \FUNC{Put} and \FUNC{Get} operations, and Alltoall interfaces. -The following list describes the specific changes in 1.3: +\section{Version 1.3} +Major changes in \openshmem[1.3] include the addition of +nonblocking \ac{RMA} operations, +atomic \PUT{} and \GET{} operations, +all-to-all collectives, +and \Cstd[11] type-generic interfaces for \ac{RMA} and \ac{AMO} operations. +The following list describes the specific changes in \openshmem[1.3]: \begin{itemize} % -\item Clarified implementation of \ac{PE}s as threads. +\item Clarified implementation of \acp{PE} as threads. % -\item Added \textbf{const} to every read-only pointer argument. +\item Added \CTYPE{const} to every read-only pointer argument. % \item Clarified definition of \OPR{Fence}. \\See Section \ref{subsec:programming_model}. @@ -379,24 +710,24 @@ \section{Version 1.3} \\See Section \ref{subsec:library_constants}. % \item Added a type-generic interface to \openshmem \ac{RMA} and \ac{AMO} - operations based on \Celev{} Generics. + operations based on \Cstd[11] Generics. \\See Sections \ref{sec:rma}, \ref{sec:rma_nbi} and \ref{sec:amo}. % -\item New non-blocking variants of remote memory access, \FUNC{SHMEM\_PUT\_NBI} +\item New nonblocking variants of remote memory access, \FUNC{SHMEM\_PUT\_NBI} and \FUNC{SHMEM\_GET\_NBI}. \\See Sections \ref{subsec:shmem_put_nbi} and \ref{subsec:shmem_get_nbi}. % \item New atomic elemental read and write operations, \FUNC{SHMEM\_FETCH} and \FUNC{SHMEM\_SET}. -\\See Sections \ref{subsec:shmem_fetch} and \ref{subsec:shmem_set} +\\See Sections \ref{subsec:shmem_atomic_fetch} and \ref{subsec:shmem_atomic_set} % \item New alltoall data exchange operations, \FUNC{SHMEM\_ALLTOALL} and \FUNC{SHMEM\_ALLTOALLS}. \\See Sections \ref{subsec:shmem_alltoall} and \ref{subsec:shmem_alltoalls}. % -\item Added \textbf{volatile} to remotely accessible pointer argument in +\item Added \CTYPE{volatile} to remotely accessible pointer argument in \FUNC{SHMEM\_WAIT} and \FUNC{SHMEM\_LOCK}. -\\See Sections \ref{subsec:shmem_wait} and \ref{subsec:shmem_lock}. +\\See Sections \ref{subsec:shmem_wait_until} and \ref{subsec:shmem_lock}. % \item Deprecation of \FUNC{SHMEM\_CACHE}. \\See Section \ref{subsec:shmem_cache}. @@ -407,23 +738,23 @@ \section{Version 1.3} \section{Version 1.2} -This section summarizes the changes from the \openshmem specification Version -1.1 to Version 1.2. A major change in this version is that it improves upon the -execution model described in 1.1 by introducing an explicit -\FUNC{shmem\_finalize} library call. This provides a collective mechanism of -exiting an \openshmem program and releasing resources used by the library. - - -The following list describes the specific changes in 1.2: - +Major changes in \openshmem[1.2] include +a new initialization routine (\FUNC{shmem\_init}), +improvements to the execution model with an explicit +library-finalization routine (\FUNC{shmem\_finalize}), +an early-exit routine (\FUNC{shmem\_global\_exit}), +namespace standardization, +and clarifications to several API descriptions. + +The following list describes the specific changes in \openshmem[1.2]: \begin{itemize} % \item Added specification of \VAR{pSync} initialization for all routines that use it. % \item Replaced all placeholder variable names \VAR{target} with \VAR{dest} to - avoid confusion with Fortran `target' keyword. + avoid confusion with \Fortran's \CONST{target} keyword. % FIXME: Change to \KEYWORD{target} % -\item New Execution Model for exiting/finishing OpenSHMEM programs. +\item New Execution Model for exiting/finishing \openshmem programs. \\See Section \ref{subsec:execution_model}. % \item New library constants to support API that query version and name information. @@ -451,24 +782,24 @@ \section{Version 1.2} \item New API to query the version and name information. \\See Section \ref{subsec:shmem_info_get_version} and \ref{subsec:shmem_info_get_name}. % -\item \openshmem library API normalization. All C symmetric memory management +\item \openshmem library API normalization. All \Cstd symmetric memory management API begins with \FUNC{shmem\_}. \\See Section \ref{subsec:shfree}. % \item Notes and clarifications added to \FUNC{shmem\_malloc}. \\See Section \ref{subsec:shfree}. % -\item Deprecation of Fortran API routine \FUNC{SHMEM\_PUT}. +\item Deprecation of \Fortran API routine \FUNC{SHMEM\_PUT}. \\See Section \ref{subsec:shmem_put}. % \item Clarification related to \FUNC{shmem\_wait}. -\\See Section \ref{subsec:shmem_wait}. +\\See Section \ref{subsec:shmem_wait_until}. % \item Undefined behavior for null pointers without zero counts added. \\See Annex \ref{sec:undefined} % \item Addition of new Annex for clearly specifying deprecated API and its - support in the existing specification version. + support across versions of the \openshmem Specification. \\See Annex \ref{sec:dep_api}. % \end{itemize} @@ -477,18 +808,14 @@ \section{Version 1.2} \section{Version 1.1} -This section summarizes the changes from the \openshmem specification Version -1.0 to the Version 1.1. A major change in this version is that it provides an -accurate description of \openshmem interfaces so that they are in agreement with -the SGI specification. This version also explains \openshmem’s programming, -memory, and execution model. The document was thoroughly changed to improve the -readability of specification and usability of interfaces. The code examples -were added to demonstrate the usability of API. Additionally, diagrams were -added to help understand the subtle semantic differences of various operations. - - -The following list describes the specific changes in 1.1: - +Major changes from \openshmem[1.0] to \openshmem[1.1] include +the introduction of the \HEADER{shmemx.h} header file for non-standard API +extensions, +clarifications to completion semantics and API descriptions in agreement with +the \ac{SGI} SHMEM specification, +and general readabilty and usability improvements to the document structure. + +The following list describes the specific changes in \openshmem[1.1]: \begin{itemize} % \item Clarifications of the completion semantics of memory synchronization @@ -524,9 +851,9 @@ \section{Version 1.1} % \item Added examples to the different interfaces. % -\item Clarification of the naming conventions for constant in \Clang{} and - \Fortran{}. -\\See Section \ref{subsec:library_constants} and \ref{subsec:shmem_wait}. +\item Clarification of the naming conventions for constant in \Cstd and + \Fortran. +\\See Section \ref{subsec:library_constants} and \ref{subsec:shmem_wait_until}. % \item Added \ac{API} calls: \FUNC{shmem\_char\_p}, \FUNC{shmem\_char\_g}. \\See Sections \ref{subsec:shmem_p} and \ref{subsec:shmem_g}. @@ -535,7 +862,7 @@ \section{Version 1.1} \FUNC{shmem\_char\_get}. \\See Sections \ref{subsec:shmem_put} and \ref{subsec:shmem_get}. % -\item The usage of \VAR{ptrdiff\_t}, \VAR{size\_t}, and \VAR{int} in the +\item The usage of \CTYPE{ptrdiff\_t}, \CTYPE{size\_t}, and \CTYPE{int} in the interface signature was made consistent with the description. \\See Sections \ref{subsec:coll}, \ref{subsec:shmem_iput}, and \ref{subsec:shmem_iget}. % @@ -546,19 +873,22 @@ \section{Version 1.1} \FUNC{shmem\_barrier}.\\ See Section \ref{subsec:shmem_barrier}. % \item Clarification of the expected behavior when multiple \FUNC{start\_pes} -calls are encountered has been clarified. +calls are encountered. \\See Section \ref{subsec:start_pes}. % \item Corrected the definition of atomic increment operation. -\\See Section \ref{subsec:shmem_inc}. +\\See Section \ref{subsec:shmem_atomic_inc}. % \item Clarification of the size of the symmetric heap and when it is set. \\See Section \ref{subsec:shfree}. % -\item Clarification of the integer and real sizes for \Fortran{} \ac{API}. -\\See Sections \ref{subsec:shmem_add}, \ref{subsec:shmem_cswap}, - \ref{subsec:shmem_swap}, \ref{subsec:shmem_finc}, \ref{subsec:shmem_inc}, and - \ref{subsec:shmem_fadd}. +\item Clarification of the integer and real sizes for \Fortran \ac{API}. +\\See Sections \ref{subsec:shmem_atomic_add}, + \ref{subsec:shmem_atomic_compare_swap}, + \ref{subsec:shmem_atomic_swap}, + \ref{subsec:shmem_atomic_fetch_inc}, + \ref{subsec:shmem_atomic_inc}, and + \ref{subsec:shmem_atomic_fetch_add}. % \item Clarification of the expected behavior on program \OPR{exit}. \\See Section \ref{subsec:execution_model}, Execution Model. @@ -568,10 +898,10 @@ \section{Version 1.1} \\See Section \ref{subsec:progress}. % \item Clarification of naming convention for non-standard interfaces and their -inclusion in \FUNC{shmemx.h}. +inclusion in \HEADER{shmemx.h}. \\See Section \ref{subsec:bindings}. % -\item Various fixes to \openshmem code examples across the specification to +\item Various fixes to \openshmem code examples across the Specification to include appropriate header files. % \item Removing requirement that implementations should detect size mismatch and @@ -579,21 +909,17 @@ \section{Version 1.1} language. \\See Sections \ref{subsec:shfree} and Annex \ref{sec:undefined}. % -\item Fortran programming fixes for examples.\\ See Sections -\ref{subsec:shmem_reductions} and \ref{subsec:shmem_wait}. +\item \Fortran programming fixes for examples.\\ See Sections +\ref{subsec:shmem_reductions} and \ref{subsec:shmem_wait_until}. % \item Clarifications of the reuse \VAR{pSync} and \VAR{pWork} across collectives. \\See Sections \ref{subsec:coll}, \ref{subsec:shmem_broadcast}, \ref{subsec:shmem_collect} and \ref{subsec:shmem_reductions}. % -\item Name changes for UV and ICE for SGI systems. +\item Name changes for UV and ICE for \ac{SGI} systems. \\See Annex \ref{sec:openshmem_history}. % \end{itemize} - - - - } %end of setlength command that was started in frontmatter.tex diff --git a/content/collective_intro.tex b/content/collective_intro.tex index f14e5856e..b58e0e9a4 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -1,39 +1,43 @@ \emph{Collective routines} are defined as communication or synchronization -operations on a group of \acp{PE} called an \activeset. The collective -routines require all \acp{PE} in the \activeset to simultaneously call the -routine. A \ac{PE} that is not part of the \activeset calling the collective -routines results in an undefined behavior. All collective routines have an -\activeset as an input parameter except \barrierall. The \barrierall is -called by all \acp{PE} of the \openshmem program. +operations on a group of \acp{PE} called an active set. The collective +routines require all \acp{PE} in the active set to simultaneously call the +routine. A \ac{PE} that is not in the active set calling the collective +routine results in undefined behavior. All collective routines have an +active set as an input parameter except \FUNC{shmem\_barrier\_all} and +\FUNC{shmem\_sync\_all}. Both \FUNC{shmem\_barrier\_all} and +\FUNC{shmem\_sync\_all} must be called by all \acp{PE} of the \openshmem program. -The \activeset is defined by the arguments \VAR{PE\_start}, \VAR{logPE\_stride}, +The active set is defined by the arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}. \VAR{PE\_start} is the starting \ac{PE} number, a log (base 2) of \VAR{logPE\_stride} is the stride between \acp{PE}, and \VAR{PE\_size} is -the number of \acp{PE} participating in the \activeset. All \acp{PE} -participating in the collective routines provide the same values for these -arguments. - +the number of \acp{PE} participating in the active set. All \acp{PE} +participating in the collective routine must provide the same values for these +arguments. + Another argument important to collective routines is \VAR{pSync}, which is a symmetric work array. All \acp{PE} participating in a collective must pass the same \VAR{pSync} array. On completion of a collective call, the \VAR{pSync} is restored to its original contents. The user is permitted to reuse a \VAR{pSync} array if all previous collective routines using the \VAR{pSync} array have been completed by all participating \acp{PE}. One can use a synchronization -collective routine such as \barrier to ensure completion of previous collective -routines. The \FUNC{shmem\_barrier} routine allows the same \VAR{pSync} array to -be used on consecutive calls as long as the \ac{PE} \activeset does not change. +collective routine such as \FUNC{shmem\_barrier} to ensure completion of previous collective +routines. The \FUNC{shmem\_barrier} and \FUNC{shmem\_sync} routines allow the same +\VAR{pSync} array to be used on consecutive calls as long as the \acp{PE} +in the active set do not change. -All collective routines defined in the specification are blocking. The +All collective routines defined in the Specification are blocking. The collective routines return on completion. The collective routines defined in -the \openshmem specification are: +the \openshmem Specification are: \begin{itemize} -\item[] \broadcast -\item[] \barrier -\item[] \barrierall -\item[] \collect -\item[] \fcollect -\item[] \reduction -\item[] \alltoall -\item[] \alltoalls -\end{itemize} +\item \FUNC{shmem\_barrier\_all} +\item \FUNC{shmem\_barrier} +\item \FUNC{shmem\_sync\_all} +\item \FUNC{shmem\_sync} +\item \FUNC{shmem\_broadcast\{32, 64\}} +\item \FUNC{shmem\_collect\{32, 64\}} +\item \FUNC{shmem\_fcollect\{32, 64\}} +\item Reductions for the following operations: AND, MAX, MIN, SUM, PROD, OR, XOR +\item \FUNC{shmem\_alltoall\{32, 64\}} +\item \FUNC{shmem\_alltoalls\{32, 64\}} +\end{itemize} diff --git a/content/coverpage.tex b/content/coverpage.tex index a46704771..d7692e386 100644 --- a/content/coverpage.tex +++ b/content/coverpage.tex @@ -25,13 +25,15 @@ \vfill{} -\section*{Developed by} +\section*{Development by} \begin{itemize} -\item High Performance Computing Tools group at the University of Houston\\ - \url{http://www.cs.uh.edu/~hpctools/} -\item Extreme Scale Systems Center, Oak Ridge National Laboratory\\ - \url{http://www.csm.ornl.gov/essc/} +\item For a current list of contributors and collaborators please see\\ + \url{http://www.openshmem.org/site/Contributors/} +\item For a current list of OpenSHMEM implementations and tools, please see\\ + \url{http://openshmem.org/site/Links#impl/} + \end{itemize} + \pagebreak{} \section*{Sponsored by} @@ -39,49 +41,77 @@ \section*{Sponsored by} \item \ac{DoD}\\ \url{http://www.defense.gov/ } \item \ac{ORNL}\\ - \url{http://www.ornl.gov/} + \url{http://www.ornl.gov/} +\item \ac{LANL}\\ + \url{http://www.lanl.gov/} \end{itemize} -\section*{Authors and Collaborators} +\section*{Current Authors and Collaborators} \begin{itemize} -\item Monika ten Bruggencate, Cray Inc. \item Matthew Baker, \ac{ORNL} -\item Barbara Chapman, \ac{UH} -\item Tony Curtis, \ac{UH} -\item Eduardo D'Azevedo, \ac{ORNL} +\item Swen Boehm, \ac{ORNL} +\item Aurelien Bouteiller, \ac{UTK} +\item Barbara Chapman, \ac{SBU} +\item Robert Cernohous, Cray Inc. +\item James Culhane, \ac{LANL} +\item Tony Curtis, \ac{SBU} \item James Dinan, Intel -\item Karl Feind, SGI +\item Mike Dubman, Mellanox +\item Karl Feind, \ac{HPE} \item Manjunath Gorentla Venkata, \ac{ORNL} +\item Max Grossman, Rice University +\item Khaled Hamidouche, \ac{AMD} \item Jeff Hammond, Intel -\item Oscar Hernandez, \ac{ORNL} +\item Yossi Itigin, Mellanox +\item Bryant Lam, \ac{DoD} \item David Knaak, Cray Inc. -\item Gregory Koenig, \ac{ORNL} \item Jeff Kuehn, \ac{LANL} -\item Graham Lopez, \ac{ORNL} \item Jens Manser, \ac{DoD} \item Tiffany M. Mintz, \ac{ORNL} +\item David Ozog, Intel \item Nicholas Park, \ac{DoD} -\item Steve Poole, OSSS -\item Wendy Poole, OSSS +\item Steve Poole, \ac{OSSS} +\item Wendy Poole, \ac{OSSS} \item Swaroop Pophale, \ac{ORNL} -\item Michael Raymond, SGI -\item Pavel Shamis, \ac{ORNL} +\item Sreeram Potluri, NVIDIA +\item Howard Pritchard, \ac{LANL} +\item Naveen Ravichandrasekaran, Cray Inc. +\item Michael Raymond, \ac{HPE} +\item James Ross, \ac{ARL} +\item Pavel Shamis, ARM Inc. \item Sameer Shende, \ac{UO} \item Lauren Smith, \ac{DoD} + +\end{itemize} + +\section*{Alumni Authors and Collaborators} +\begin{itemize} +\item Amrita Banerjee, \ac{UH} +\item Monika ten Bruggencate, Cray Inc. +\item Eduardo D'Azevedo, \ac{ORNL} +\item Oscar Hernandez, \ac{ORNL} +\item Gregory Koenig, \ac{ORNL} +\item Graham Lopez, \ac{ORNL} +\item Ricardo Mauricio, \ac{UH} +\item Ram Nanjegowda, \ac{UH} \item Aaron Welch, \ac{ORNL} \end{itemize} \date{\today} -\section*{Acknowledgements} +\section*{Acknowledgments} The \openshmem specification belongs to Open Source Software Solutions, Inc. -(OSSS), a non-profit organization, under an agreement with SGI. The development -work of the specification is supported by the Oak Ridge National Laboratory -Extreme Scale Systems Center and the Department of Defense.\\ +(OSSS), a non-profit organization, under an agreement with HPE. For a current list +of Contributors and Collaborators, please see + \url{http://www.openshmem.org/site/Contributors/}. +We gratefully acknowledge support from +Oak Ridge National Laboratory's +Extreme Scale Systems Center and the continuing support of the Department of Defense.\\ \\ We would also like to acknowledge the contribution of the members of the \openshmem mailing list for their ideas, discussions, suggestions, and -constructive criticism which has helped us improve this document. - - +constructive criticism which has helped us improve this document.\\ +\\ +\openshmem[1.4] is dedicated to the memory of David Charles Knaak. David was a highly involved +colleague and contributor to the entire OpenSHMEM project. He will be missed. diff --git a/content/environment_variables.tex b/content/environment_variables.tex index 2463a5415..96deaeb92 100644 --- a/content/environment_variables.tex +++ b/content/environment_variables.tex @@ -1,27 +1,43 @@ +\TableIndex{Environment Variables} + The \openshmem specification provides a set of environment variables that allows users to configure the \openshmem implementation, and receive information about the implementation. The implementations of the specification are free to define additional variables. Currently, the specification defines four environment -variables. +variables. All environment variables that start with \VAR{SMA\_*} are +deprecated, but currently supported for backwards compatibility. +If both \VAR{SHMEM\_}- and \VAR{SMA\_}-prefixed environment variables +are set, then the value in the \VAR{SHMEM\_}-prefixed environment variable +establishes the controlling value. Refer to the +\hyperref[subsec:deprecate-sma-env]{\VAR{SMA\_*} Environment Variables} +deprecation rationale for more details. \medskip{} \begin{tabular}{|l|l|l|} -\hline -Variable & Value & Purpose\tabularnewline -\hline -\hline -\texttt{SMA\_VERSION} & any & print the library version at -start-up\tabularnewline -\hline -\texttt{SMA\_INFO} & any & print helpful text about all these environment -variables\tabularnewline -\hline -\texttt{SMA\_SYMMETRIC\_SIZE} & non-negative integer & number of bytes to -allocate for symmetric heap\tabularnewline -\hline -\texttt{SMA\_DEBUG} & any & enable debugging messages\tabularnewline -\hline +\hline +\textbf{Variable} & \textbf{Value} & \textbf{Description} +\tabularnewline\hline +%% +\EnvVarDecl{SHMEM\_VERSION} + & Any + & Print the library version at start-up + \tabularnewline\hline +%% +\EnvVarDecl{SHMEM\_INFO} + & Any + & Print helpful text about all these environment variables + \tabularnewline\hline +%% +\EnvVarDecl{SHMEM\_SYMMETRIC\_SIZE} + & Non-negative integer + & Number of bytes to allocate for symmetric heap + \tabularnewline\hline +%% +\EnvVarDecl{SHMEM\_DEBUG} + & Any + & Enable debugging messages + \tabularnewline\hline \end{tabular} \medskip{} diff --git a/content/execution_model.tex b/content/execution_model.tex index dab5c6b40..d8d4d2132 100644 --- a/content/execution_model.tex +++ b/content/execution_model.tex @@ -1,37 +1,37 @@ -An \openshmem program consists of a set of \openshmem processes called \ac{PE}s -that execute in a \ac{SPMD}-like model where each \ac{PE} can take a different +An \openshmem program consists of a set of \openshmem processes called \acp{PE} +that execute in an \ac{SPMD}-like model where each \ac{PE} can take a different execution path. For example, a \ac{PE} can be implemented using an OS -process. The \ac{PE}s progress asynchronously, and can communicate/synchronize -via the \openshmem interfaces. All \ac{PE}s in an \openshmem program should -start by calling the initialization routine \FUNC{shmem\_init} -\footnote{\textbf{start\_pes} has been deprecated as of Specification 1.2} -before using any of the other \openshmem library routines. An \openshmem -program finishes execution by returning from the main routine or when any PE -calls \FUNC{shmem\_global\_exit}. When returning from main, \openshmem must +process. The \acp{PE} may be either single or multithreaded. +The \acp{PE} progress asynchronously, and can communicate/synchronize +via the \openshmem interfaces. All \acp{PE} in an \openshmem program should +start by calling the initialization routine \FUNC{shmem\_init}% +\footnote{\FUNC{start\_pes} has been deprecated as of \openshmem[1.2]} +or \FUNC{shmem\_init\_thread} before using any of the other \openshmem library routines. +An \openshmem program concludes its use of the \openshmem library when all \acp{PE} call +\FUNC{shmem\_finalize} or any \ac{PE} calls \FUNC{shmem\_global\_exit}. +During a call to \FUNC{shmem\_finalize}, the \openshmem library must complete all pending communication and release all the resources associated to -the library using an implicit collective synchronization across PEs. The user -has the option to call \FUNC{shmem\_finalize} (before returning from main) to -complete all pending communication and release all the \openshmem library -resources without terminating the program. Calling any \openshmem routine after -\FUNC{shmem\_finalize} leads to undefined behavior. +the library using an implicit collective synchronization across \acp{PE}. +Calling any \openshmem routine after \FUNC{shmem\_finalize} leads to undefined +behavior. -The \ac{PE}s of the \openshmem program are identified by unique integers. The +The \acp{PE} of the \openshmem program are identified by unique integers. The identifiers are integers assigned in a monotonically increasing manner from zero -to the total number of \ac{PE}s minus 1. \ac{PE} identifiers are used for +to one less than the total number of \acp{PE}. \ac{PE} identifiers are used for \openshmem calls (e.g. to specify \OPR{put} or \OPR{get} routines on symmetric data objects, collective synchronization calls) or to dictate a control flow for -\ac{PE}s using constructs of \Clang{} or \Fortran. The identifiers are fixed for +\acp{PE} using constructs of \Cstd or \Fortran. The identifiers are fixed for the life of the \openshmem program. -\subsection{Progress of \openshmem Operations}\label{subsec:progress} +\subsection{Progress of OpenSHMEM Operations}\label{subsec:progress} The \openshmem model assumes that computation and communication are naturally overlapped. \openshmem programs are expected to exhibit progression of communication both with and without \openshmem calls. Consider a \ac{PE} that is -engaged in a computation with no \openshmem calls. Other \ac{PE}s should be able -to communicate (\OPR{put}, \OPR{get}, \OPR{collective}, \OPR{atomic}, etc) and +engaged in a computation with no \openshmem calls. Other \acp{PE} should be able +to communicate (\OPR{put}, \OPR{get}, \OPR{atomic}, etc) and complete communication operations with that computationally-bound \ac{PE} -without that \ac{PE} issuing any explicit \openshmem calls. \openshmem +without that \ac{PE} issuing any explicit \openshmem calls. One-sided \openshmem communication calls involving that \ac{PE} should progress regardless of when that \ac{PE} next engages in an \openshmem call. @@ -52,19 +52,3 @@ \subsection{Progress of \openshmem Operations}\label{subsec:progress} provide asynchronous one-sided operations, as these have very limited performance value for \openshmem programs. \end{itemize} - -\subsection{Atomicity Guarantees}\label{subsec:amo_guarantees} - -\openshmem contains a number of routines that operate on symmetric data -atomically (Section \ref{sec:amo}). These routines guarantee that accesses by -\openshmem's atomic operations with the same datatype will be exclusive, but do not guarantee -exclusivity in combination with other routines, either inside \openshmem or -outside. - -For example: during the execution of an atomic remote integer increment -operation on a symmetric variable \VAR{X}, no other \openshmem atomic operation -may access \VAR{X}. After the increment, \VAR{X} will have increased its value -by \CONST{1} on the destination \ac{PE}, at which point other atomic operations -may then modify that \VAR{X}. However, access to the symmetric object \VAR{X} -with non-atomic operations, such as one-sided \OPR{put} or \OPR{get} operations, -will \OPR{invalidate} the atomicity guarantees. diff --git a/content/frontmatter.tex b/content/frontmatter.tex index a25d056e7..d8510accb 100644 --- a/content/frontmatter.tex +++ b/content/frontmatter.tex @@ -15,7 +15,7 @@ \input{content/coverpage} \setcounter{tocdepth}{4} -\setcounter{secnumdepth}{3} +\setcounter{secnumdepth}{4} \tableofcontents \mainmatter % included for use of documenttype 'book' @@ -34,4 +34,3 @@ %\setlength{\baselineskip}{3pt plus 3pt minus 3pt} \setlength{\parskip}{3pt} - diff --git a/content/language_bindings_and_conformance.tex b/content/language_bindings_and_conformance.tex index 97d9e670b..f1f9f29c5 100644 --- a/content/language_bindings_and_conformance.tex +++ b/content/language_bindings_and_conformance.tex @@ -1,19 +1,28 @@ -\openshmem provides ISO \Clang{} and \Fortran{} \textit{90} language bindings. -Any implementation that provides both \Clang{} and \Fortran{} bindings can claim -conformance to the specification. An implementation that provides e.g.\ only a -\Clang{} interface may claim to conform to the \openshmem specification with -respect to the \Clang{} language, but not to \Fortran, and should make this -clear in its documentation. The \openshmem header files for \Clang{} and -\Fortran{} must contain only the interfaces and constant names defined in this +\openshmem provides ISO \Cstd and \Fortran[90] language bindings. +As of \openshmem[1.4], the \Fortran API is deprecated. +For rationale and considerations of +future \Fortran use of \openshmem, see Section~\ref{subsec:deprecate-fortran}. + +Any implementation that provides both \Cstd and \Fortran bindings can claim +conformance to the specification. Alternatively, an implementation may claim +conformance only with respect to one of those languages. For example, +an implementation that provides only a +\Cstd interface may claim to conform to the \openshmem specification with +respect to the \Cstd language, but not to \Fortran, and should make this +clear in its documentation. The \openshmem header files \HEADER{shmem.h} for +\Cstd and \HEADER{shmem.fh} for +\Fortran must contain only the interfaces and constant names defined in this specification. -\openshmem \ac{API}s can be implemented as either routines or macros. However, +\openshmem \acp{API} can be implemented as either routines or macros. However, implementing the interfaces using macros is strongly discouraged as this could severely limit the use of external profiling tools and high-level compiler optimizations. An \openshmem program should avoid defining routine names, -variables, or identifiers with the prefix \shmemprefix (for \Clang{} and -\Fortran), \shmemprefixC (for \Clang) or with \openshmem \ac{API} names. +variables, or identifiers with the prefix \shmemprefix (for \Cstd and +\Fortran), \shmemprefixC (for \Cstd) or with \openshmem \ac{API} names. -All \openshmem extension \ac{API}s that are not part of this specification must -be defined in the \FUNC{shmemx.h} include file. These extensions shall use the +All \openshmem extension \acp{API} that are not part of this specification must +be defined in the \HEADER{shmemx.h} and \HEADER{shmemx.fh} include files for +\Cstd and \Fortran language bindings, respectively. These header files +must exist, even if no extensions are provided. Any extensions shall use the \FUNC{shmemx\_} prefix for all routine, variable, and constant names. diff --git a/content/library_constants.tex b/content/library_constants.tex index b3ad2993e..8a1774ac7 100644 --- a/content/library_constants.tex +++ b/content/library_constants.tex @@ -1,125 +1,236 @@ -The constants that start with SHMEM\_* are for both \Fortran{} -and \CorCpp, and they are compile-time constants. -All constants that start with -\_SHMEM\_* are deprecated and provided for backwards compatibility. -\newline -\newline -\begin{tabular}{|p{0.4\textwidth}|p{0.5\textwidth}|} +\TableIndex{Library Constants} +\TableIndex{Constants} + +The \openshmem library provides a set of compile-time constants that may +be used to specify options to API routines, provide implementation-specific +parameters, or return information about the implementation. +All constants that start with \CONST{\_SHMEM\_*} are deprecated, +but provided for backwards compatibility. + +\begin{longtable}{|p{0.45\textwidth}|p{0.5\textwidth}|} \hline \textbf{Constant} & \textbf{Description} -\tabularnewline -\hline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_BCAST\_SYNC\_SIZE}}} -& +\tabularnewline \hline +\endhead +%% +\LibConstDecl[\CorCpp]{SHMEM\_THREAD\_SINGLE} & +The \openshmem thread support level which specifies that the program +must not be multithreaded. +See Section~\ref{subsec:thread_support} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl[\CorCpp]{SHMEM\_THREAD\_FUNNELED} & +The \openshmem thread support level which specifies that the program +may be multithreaded but must ensure that only the main thread invokes +the \openshmem interfaces. +See Section~\ref{subsec:thread_support} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl[\CorCpp]{SHMEM\_THREAD\_SERIALIZED} & +The \openshmem thread support level which specifies that the program +may be multithreaded but must ensure that the \openshmem interfaces +are not invoked concurrently by multiple threads. +See Section~\ref{subsec:thread_support} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl[\CorCpp]{SHMEM\_THREAD\_MULTIPLE} & +The \openshmem thread support level which specifies that the program +may be multithreaded and any thread may invoke the \openshmem interfaces. +See Section~\ref{subsec:thread_support} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl[\CorCpp]{SHMEM\_CTX\_SERIALIZED} & +The context creation option which specifies that the given context +is shareable but will not be used by multiple threads concurrently. +See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl[\CorCpp]{SHMEM\_CTX\_PRIVATE} & +The context creation option which specifies that the given context +will be used only by the thread that created it. +See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl[\CorCpp]{SHMEM\_CTX\_NOSTORE} & +The context creation option which specifies that quiet and fence operations +performed on the given context are not required to enforce completion and +ordering of memory store operations. +See Section~\ref{subsec:shmem_ctx_create} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_SYNC\_VALUE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_SYNC\_VALUE} +\end{DeprecateBlock} +& +The value used to initialize the elements of \VAR{pSync} arrays. +The value of this constant is implementation specific. +See Section~\ref{subsec:coll} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_SYNC\_SIZE} & +Length of a work array that can be used with any SHMEM collective +communication operation. +Work arrays sized for specific operations may consume less memory. +The value of this constant is implementation specific. +See Section~\ref{subsec:coll} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_BCAST\_SYNC\_SIZE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_BCAST\_SYNC\_SIZE} +\end{DeprecateBlock} +& Length of the \VAR{pSync} arrays needed for broadcast routines. The value -of this constant is implementation specific. Refer to the -\hyperref[subsec:shmem_broadcast]{Broadcast Routines} section under -\hyperref[sec:openshmem_library_api]{Library Routines} for more information -about the usage of this constant. \tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_SYNC\_VALUE}}} -& -The value used to initialize the elements of \VAR{pSync} arrays. The -value of this constant is implementation specific.\tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_REDUCE\_SYNC\_SIZE}}} -& -Length of the work arrays needed for reduction routines. The value -of this constant is implementation specific. Refer to the -\hyperref[subsec:shmem_reductions]{Reduction Routines} section under -\hyperref[sec:openshmem_library_api]{Library Routines} for more information -about the usage of this constant.\tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_BARRIER\_SYNC\_SIZE}}} -& -Length of the work array needed for barrier routines. The value -of this constant is implementation specific. Refer to the -\hyperref[subsec:shmem_barrier]{Barrier Synchronization Routines} section under -\hyperref[sec:openshmem_library_api]{Library Routines} -for more information about the usage of this constant.\tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_COLLECT\_SYNC\_SIZE}}} -& -Length of the work array needed for collect routines. The value -of this constant is implementation specific. Refer to the -\hyperref[subsec:shmem_collect]{Collect Routines} section under -\hyperref[sec:openshmem_library_api]{Library Routines} for more information -about the usage of this constant.\tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_ALLTOALL\_SYNC\_SIZE}}} -& -Length of the work array needed for \FUNC{shmem\_alltoall} -routines. The value of this constant is implementation -specific. Refer to the \hyperref[subsec:shmem_alltoall]{Alltoall -routines} sections under \hyperref[sec:openshmem_library_api]{Library Routines} -for more information about the usage of this constant.\tabularnewline -\hline -\end{tabular} - -\begin{tabular}{|p{0.4\textwidth}|p{0.5\textwidth}|} -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_ALLTOALLS\_SYNC\_SIZE}}} -& -Length of the work array needed for \FUNC{shmem\_alltoalls} -routines. The value of this constant is implementation -specific. Refer to the \hyperref[subsec:shmem_alltoalls]{Alltoalls -routines} sections under \hyperref[sec:openshmem_library_api]{Library Routines} -for more information about the usage of this constant.\tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}}} -& Minimum length of work arrays used in various collective routines.\tabularnewline -\hline -\vspace{3mm} -%\color{red} -%\vtop{\hbox{} -%\hbox{\hspace*{12mm} \const{}} -%\hbox{} -%\hbox{\hspace*{12mm} \const{}}} -%& \color{red} -%Ticket \#107 \tabularnewline -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_MAJOR\_VERSION}}} -& -Integer representing the major version of \openshmem{} standard in use. \tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_MINOR\_VERSION}}} -& -Integer representing the minor version of \openshmem{} standard in use. \tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_MAX\_NAME\_LEN}}} +of this constant is implementation specific. +See Section~\ref{subsec:shmem_broadcast} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_REDUCE\_SYNC\_SIZE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_REDUCE\_SYNC\_SIZE} +\end{DeprecateBlock} & -Integer representing the length of vendor string. \tabularnewline -\hline -\vspace{3mm} -\vtop{\hbox{\CorCppFor:} -\hbox{\hspace*{12mm} \const{SHMEM\_VENDOR\_STRING}}} -& -String representing the vendor name of length less than -\const{SHMEM\_MAX\_NAME\_LEN}. In Fortran the string must be \const{SHMEM\_MAX\_NAME\_LEN} -and whitespace padded. It can also be equal in length to \const{SHMEM\_MAX\_NAME\_LEN} -since Fortran does not NULL terminate strings. \tabularnewline -\hline +Length of the work arrays needed for reduction routines. +The value of this constant is implementation specific. +See Section~\ref{subsec:shmem_reductions} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_BARRIER\_SYNC\_SIZE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_BARRIER\_SYNC\_SIZE} +\end{DeprecateBlock} +& +Length of the work array needed for barrier routines. +The value of this constant is implementation specific. +See Section~\ref{subsec:shmem_barrier} for more detail about its use. -\end{tabular} -\color{black} +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_COLLECT\_SYNC\_SIZE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_COLLECT\_SYNC\_SIZE} +\end{DeprecateBlock} +& +Length of the work array needed for collect routines. +The value of this constant is implementation specific. +See Section~\ref{subsec:shmem_collect} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_ALLTOALL\_SYNC\_SIZE} & +Length of the work array needed for \FUNC{shmem\_alltoall} routines. +The value of this constant is implementation specific. +See Section~\ref{subsec:shmem_alltoall} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_ALLTOALLS\_SYNC\_SIZE} & +Length of the work array needed for \FUNC{shmem\_alltoalls} routines. +The value of this constant is implementation specific. +See Section~\ref{subsec:shmem_alltoalls} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE} +\end{DeprecateBlock} +& +Minimum length of work arrays used in various collective routines. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_MAJOR\_VERSION} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_MAJOR\_VERSION} +\end{DeprecateBlock} +& +Integer representing the major version of \openshmem Specification in use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_MINOR\_VERSION} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_MINOR\_VERSION} +\end{DeprecateBlock} +& +Integer representing the minor version of \openshmem Specification in use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_MAX\_NAME\_LEN} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_MAX\_NAME\_LEN} +\end{DeprecateBlock} +& +Integer representing the maximum length of \CONST{SHMEM\_VENDOR\_STRING}. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_VENDOR\_STRING} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_VENDOR\_STRING} +\end{DeprecateBlock} +& +String representing vendor defined information of size at most +\CONST{SHMEM\_MAX\_NAME\_LEN}. +In \CorCpp{}, the string is terminated by a null character. In \Fortran, the +string of size less than \CONST{SHMEM\_MAX\_NAME\_LEN} is padded with blank +characters up to size \CONST{SHMEM\_MAX\_NAME\_LEN}. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CMP\_EQ} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_CMP\_EQ} +\end{DeprecateBlock} +& +An integer constant expression corresponding to the +``equal to'' comparison operation. +See Section~\ref{subsec:p2p_intro} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CMP\_NE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_CMP\_NE} +\end{DeprecateBlock} +& +An integer constant expression corresponding to the +``not equal to'' comparison operation. +See Section~\ref{subsec:p2p_intro} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CMP\_LT} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_CMP\_LT} +\end{DeprecateBlock} +& +An integer constant expression corresponding to the +``less than'' comparison operation. +See Section~\ref{subsec:p2p_intro} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CMP\_LE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_CMP\_LE} +\end{DeprecateBlock} +& +An integer constant expression corresponding to the +``less than or equal to'' comparison operation. +See Section~\ref{subsec:p2p_intro} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CMP\_GT} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_CMP\_GT} +\end{DeprecateBlock} +& +An integer constant expression corresponding to the +``greater than'' comparison operation. +See Section~\ref{subsec:p2p_intro} for more detail about its use. +\tabularnewline \hline +%% +\LibConstDecl{SHMEM\_CMP\_GE} +\begin{DeprecateBlock} + \LibConstDecl[\CorCpp]{\_SHMEM\_CMP\_GE} +\end{DeprecateBlock} +& +An integer constant expression corresponding to the +``greater than or equal to'' comparison operation. +See Section~\ref{subsec:p2p_intro} for more detail about its use. +\tabularnewline \hline +%% +\end{longtable} diff --git a/content/library_handles.tex b/content/library_handles.tex new file mode 100644 index 000000000..63d726182 --- /dev/null +++ b/content/library_handles.tex @@ -0,0 +1,24 @@ +\TableIndex{Library Handles} +\TableIndex{Handles} + +The \openshmem library provides a set of predefined named constant handles. +All named constants can be used in initialization expressions or assignments, +but not necessarily in array declarations or as labels in \Cstd switch statements. +This implies named constants to be link-time but not necessarily compile-time +constants. + +\begin{longtable}{|p{0.45\textwidth}|p{0.5\textwidth}|} +\hline +\textbf{Handle} & \textbf{Description} +\tabularnewline \hline +\endhead +%% +\LibHandleDecl[\CorCpp]{SHMEM\_CTX\_DEFAULT} & +Handle of type \CTYPE{shmem\_ctx\_t} that corresponds to the +default communication context. All point-to-point communication operations +and synchronizations that do not specify a context are performed on the +default context. +See Section~\ref{sec:ctx} for more detail about its use. +\tabularnewline \hline +%% +\end{longtable} diff --git a/content/memory_model.tex b/content/memory_model.tex index 4ed391c0c..6dae1efb6 100644 --- a/content/memory_model.tex +++ b/content/memory_model.tex @@ -1,18 +1,18 @@ \begin{figure}[h] \includegraphics[width=0.95\textwidth]{figures/mem_model} -\caption{\OSH Memory Model} +\caption{\openshmem Memory Model} \label{fig:mem_model} \end{figure} % An \openshmem program consists of data objects that are private to each \ac{PE} -and data objects that are remotely accessible by all \ac{PE}s. Private data +and data objects that are remotely accessible by all \acp{PE}. Private data objects are stored in the local memory of each \ac{PE} and can only be accessed -by the \ac{PE} itself; these data objects cannot be accessed by other \ac{PE}s +by the \ac{PE} itself; these data objects cannot be accessed by other \acp{PE} via \openshmem routines. Private data objects follow the memory model of -\Clang{} or \Fortran. Remotely accessible objects, however, can be accessed by -remote \ac{PE}s using \openshmem routines. Remotely accessible data objects are +\Cstd or \Fortran. Remotely accessible objects, however, can be accessed by +remote \acp{PE} using \openshmem routines. Remotely accessible data objects are called \emph{Symmetric Data Objects}. Each symmetric data object has a -corresponding object with the same name, type, and size on all PEs where that object is +corresponding object with the same name, type, and size on all \acp{PE} where that object is accessible via the \openshmem \ac{API}\footnote{For efficiency reasons, the same offset (from an arbitrary memory address) for symmetric data objects might be used on all \acp{PE}. Further discussion about symmetric heap @@ -20,22 +20,29 @@ \ref{subsec:shfree}}. (For the definition of what is accessible, see the descriptions for \FUNC{shmem\_pe\_accessible} and \FUNC{shmem\_addr\_accessible} in sections \ref{subsec:shmem_pe_accessible} and -\ref{subsec:shmem_addr_accessible}.) Symmetric data objects accessed via typed -\openshmem interfaces are required to be natural aligned based on their type +\ref{subsec:shmem_addr_accessible}.) Symmetric data objects accessed via typed and +type-generic \openshmem interfaces are required to be naturally aligned based on their type requirements and underlying architecture. In \openshmem the following kinds of data objects are symmetric: % \begin{itemize} - \item \Fortran{} data objects in common blocks or with the SAVE attribute. - These data objects must not be defined in a dynamic shared object (DSO). - \item Global and static \Clang{} and \Cpp variables. These data objects must - not be defined in a DSO. - \item \Fortran{} arrays allocated with \textit{shpalloc} - \item \Clang{} and \Cpp data allocated by \textit{shmem\_malloc} +\item + \begin{deprecate} + \Fortran data objects in common blocks or with the \CTYPE{SAVE} attribute. + These data objects must not be defined in a dynamic shared object (DSO). + \end{deprecate} +\item Global and static \Cstd and \Cpp variables. These data objects must + not be defined in a DSO. +\item + \begin{deprecate} + \Fortran arrays allocated with \FUNC{shpalloc} + \end{deprecate} +\item \Cstd and \Cpp data allocated by \openshmem memory management routines + (Section~\ref{sec:memory_management}) \end{itemize} -\openshmem dynamic memory allocation routines (\textit{shpalloc} and -\textit{shmem\_malloc}) allow collective allocation of \emph{Symmetric Data +\openshmem dynamic memory allocation routines (\FUNC{shpalloc} and +\FUNC{shmem\_malloc}) allow collective allocation of \emph{Symmetric Data Objects} on a special memory region called the \emph{Symmetric Heap}. The Symmetric Heap is created during the execution of a program at a memory location determined by the implementation. The Symmetric Heap may reside in different @@ -44,3 +51,19 @@ objects and private data objects when executing an \openshmem program. Symmetric data objects are stored on the symmetric heap or in the global/static memory section of each \ac{PE}. + +\subsection{Atomicity Guarantees}\label{subsec:amo_guarantees} + +\openshmem contains a number of routines that operate on symmetric data +atomically (Section \ref{sec:amo}). These routines guarantee that accesses by +\openshmem's atomic operations with the same datatype will be exclusive, but do not guarantee +exclusivity in combination with other routines, either inside \openshmem or +outside. + +For example: during the execution of an atomic remote integer increment +operation on a symmetric variable \VAR{X}, no other \openshmem atomic operation +may access \VAR{X}. After the increment, \VAR{X} will have increased its value +by \CONST{1} on the destination \ac{PE}, at which point other atomic operations +may then modify that \VAR{X}. However, access to the symmetric object \VAR{X} +with non-atomic operations, such as one-sided \OPR{put} or \OPR{get} operations, +will invalidate the atomicity guarantees. diff --git a/content/p2p_sync_intro.tex b/content/p2p_sync_intro.tex new file mode 100644 index 000000000..8855716ab --- /dev/null +++ b/content/p2p_sync_intro.tex @@ -0,0 +1,69 @@ +The following section discusses \openshmem \acp{API} that provide a mechanism +for synchronization between two \acp{PE} based on the value of a symmetric data +object. +The point-to-point synchronization routines can be used to portably ensure +that memory access operations observe remote updates in the order enforced by +the initiator \ac{PE} using the \FUNC{shmem\_fence} and \FUNC{shmem\_quiet} +routines. + +Where appropriate compiler support is available, \openshmem provides +type-generic point-to-point synchronization interfaces via \Cstd[11] generic +selection. Such type-generic routines are supported for the +``point-to-point synchronization types'' identified in +Table~\ref{p2psynctypes}. + +The point-to-point synchronization types include some of the exact-width +integer types defined in \HEADER{stdint.h} by \Cstd[99]~\S7.18.1.1 and +\Cstd[11]~\S7.20.1.1. When the \Cstd translation environment +does not provide exact-width integer types with \HEADER{stdint.h}, an +\openshmem implemementation is not required to provide support for these types. + +\begin{table}[h] + \begin{center} + \begin{tabular}{|l|l|} + \hline + \TYPE & \TYPENAME \\ \hline + short & short \\ \hline + int & int \\ \hline + long & long \\ \hline + long long & longlong \\ \hline + unsigned short & ushort \\ \hline + unsigned int & uint \\ \hline + unsigned long & ulong \\ \hline + unsigned long long & ulonglong \\ \hline + int32\_t & int32 \\ \hline + int64\_t & int64 \\ \hline + uint32\_t & uint32 \\ \hline + uint64\_t & uint64 \\ \hline + size\_t & size \\ \hline + ptrdiff\_t & ptrdiff \\ \hline + \end{tabular} + \TableCaptionRef{Point-to-Point Synchronization Types and Names} + \label{p2psynctypes} + \end{center} +\end{table} + +The point-to-point synchronization interface provides named constants whose +values are integer constant expressions that specify the comparison operators +used by \openshmem synchronization routines. +The constant names and associated operations are +presented in Table~\ref{p2p-consts}. For Fortran, the constant names of +Table~\ref{p2p-consts} shall be identifiers for integer parameters of +default kind corresponding to the associated comparison operation. + +\begin{table}[h] + \begin{center} + \begin{tabular}{ll} + \hline + Constant Name & Comparison \\ \hline + \LibConstRef{SHMEM\_CMP\_EQ} & Equal \\ + \LibConstRef{SHMEM\_CMP\_NE} & Not equal \\ + \LibConstRef{SHMEM\_CMP\_GT} & Greater than \\ + \LibConstRef{SHMEM\_CMP\_GE} & Greater than or equal to \\ + \LibConstRef{SHMEM\_CMP\_LT} & Less than \\ + \LibConstRef{SHMEM\_CMP\_LE} & Less than or equal to \\ \hline + \end{tabular} + \TableCaptionRef{Point-to-Point Comparison Constants} + \label{p2p-consts} + \end{center} +\end{table} diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index 24eaec750..9ddfdb48b 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -1,10 +1,10 @@ \openshmem implements \ac{PGAS} by defining remotely accessible data objects as -mechanisms to share information among \openshmem processes or \acp{PE} and -private data objects that are accessible by the \ac{PE} itself. The \ac{API} +mechanisms to share information among \openshmem processes or \acp{PE}, and +private data objects that are accessible by only the \ac{PE} itself. The \ac{API} allows communication and synchronization operations on both private (local to the PE initiating the operation) and remotely accessible data objects. The key feature of \openshmem is that data transfer operations are -\textit{\textbf{one-sided}} in nature. This means that a local \ac{PE} executing +\emph{one-sided} in nature. This means that a local \ac{PE} executing a data transfer routine does not require the participation of the remote \ac{PE} to complete the routine. This allows for overlap between communication and computation to hide data transfer latencies, which makes \openshmem ideal for @@ -12,15 +12,17 @@ library routines have the potential to provide a low-latency, high-bandwidth communication \ac{API} for use in highly parallelized scalable programs. -The \openshmem{} interfaces can be used to implement \ac{SPMD} style programs. -It provides interfaces to start the \openshmem{} \ac{PE}s in parallel, and +The \openshmem interfaces can be used to implement \ac{SPMD} style programs. +It provides interfaces to start the \openshmem \acp{PE} in parallel and communication and synchronization interfaces to access remotely accessible data -objects across \ac{PE}s. These interfaces can be leveraged to divide a problem +objects across \acp{PE}. These interfaces can be leveraged to divide a problem into multiple sub-problems that can be solved independently or with coordination using the communication and synchronization interfaces. The \openshmem specification defines library calls, constants, variables, and language bindings -for \Clang{} and \Fortran{}. The \Cpp{} interface is currently the same as that -for \Clang. Unlike UPC, Fortran 2008, Titanium, X10 and Chapel, which are all +for \Cstd and \Fortran% +\footnote{As of \openshmem[1.4], the \Fortran interface has been deprecated.}. +The \Cpp interface is currently the same as that +for \Cstd. Unlike Unified Parallel C, \Fortran[2008], Titanium, X10, and Chapel, which are all PGAS languages, \openshmem relies on the user to use the library calls to implement the correct semantics of its programming model. @@ -30,7 +32,8 @@ \item \textbf{Library Setup and Query} \begin{enumerate} - \item \OPR{Initialization}: The \openshmem library environment is initialized. + \item \OPR{Initialization}: The \openshmem library environment is initialized, + where the \acp{PE} are either single or multithreaded. \item \OPR{Query}: The local \ac{PE} may get the number of \acp{PE} running the same program and its unique integer identifier. \item \OPR{Accessibility}: The local \ac{PE} can find out if a remote \ac{PE} is @@ -41,14 +44,22 @@ \item \textbf{Symmetric Data Object Management} \begin{enumerate} - \item \OPR{Allocation}: All executing \ac{PE}s must participate in the + \item \OPR{Allocation}: All executing \acp{PE} must participate in the allocation of a symmetric data object with identical arguments. - \item \OPR{Deallocation}: All executing \ac{PE}s must participate in the + \item \OPR{Deallocation}: All executing \acp{PE} must participate in the deallocation of the same symmetric data object with identical arguments. - \item \OPR{Reallocation}: All executing \ac{PE}s must participate in the + \item \OPR{Reallocation}: All executing \acp{PE} must participate in the reallocation of the same symmetric data object with identical arguments. \end{enumerate} +\item \textbf{Communication Management} +\begin{enumerate} + \item \OPR{Contexts}: Contexts are containers for communication operations. + Each context provides an environment where the operations performed on + that context are ordered and completed independently of other operations + performed by the application. +\end{enumerate} + \item \textbf{Remote Memory Access} \begin{enumerate} \item \PUT: The local \ac{PE} specifies the \source{} data object (private @@ -68,6 +79,9 @@ symmetric data object on the remote \ac{PE}. \item \OPR{Add}: The \ac{PE} initiating the add specifies the value to be added to the symmetric data object on the remote \ac{PE}. + \item \OPR{Bitwise Operations}: The \ac{PE} initiating the bitwise + operation specifies the operand value to the bitwise operation to be + performed on the symmetric data object on the remote \ac{PE}. \item \OPR{Compare and Swap}: The \ac{PE} initiating the swap gets the old value of the symmetric data object based on a value to be compared and copies a new value to the symmetric data object on the remote \ac{PE}. @@ -77,6 +91,10 @@ \item \OPR{Fetch and Add}: The \ac{PE} initiating the add specifies the value to be added to the symmetric data object on the remote \ac{PE} and returns with the old value. + \item \OPR{Fetch and Bitwise Operations}: The \ac{PE} initiating the bitwise + operation specifies the operand value to the bitwise operation to be + performed on the symmetric data object on the remote \ac{PE} + and returns the old value. \end{enumerate} \item \textbf{Synchronization and Ordering} @@ -85,16 +103,16 @@ \PUT, AMO, and memory store operations to symmetric data objects with respect to a specific destination \ac{PE}. - \item \OPR{Quiet}: The \ac{PE} calling quiet ensures completion of remote access + \item \OPR{Quiet}: The \ac{PE} calling quiet ensures remote completion of remote access operations and stores to symmetric data objects. - \item \OPR{Barrier}: All or some \ac{PE}s collectively synchronize and ensure + \item \OPR{Barrier}: All or some \acp{PE} collectively synchronize and ensure completion of all remote and local updates prior to any \ac{PE} returning from the call. \end{enumerate} \item \textbf{Collective Communication} \begin{enumerate} - \item \OPR{Broadcast}: The \textit{root} \ac{PE} specifies a symmetric data + \item \OPR{Broadcast}: The \VAR{root} \ac{PE} specifies a symmetric data object to be copied to a symmetric data object on one or more remote \acp{PE} (not including itself). \item \OPR{Collection}: All \acp{PE} participating in the routine get the result @@ -102,22 +120,28 @@ another symmetric data object. \item \OPR{Reduction}: All \acp{PE} participating in the routine get the result of an associative binary routine over elements of the specified symmetric - data object on another symmetric data object. + data object on another symmetric data object. + \item \OPR{All-to-All}: All \acp{PE} participating in the routine exchange + a fixed amount of contiguous or strided data with all other \acp{PE} + in the active set. \end{enumerate} \item \textbf{Mutual Exclusion} \begin{enumerate} \item \OPR{Set Lock}: The \ac{PE} acquires exclusive access to the region - bounded by the symmetric \textit{lock} variable. - \item \OPR{Test Lock}: The \ac{PE} tests the symmetric \textit{lock} variable + bounded by the symmetric \VAR{lock} variable. + \item \OPR{Test Lock}: The \ac{PE} tests the symmetric \VAR{lock} variable for availability. \item \OPR{Clear Lock}: The \ac{PE} which has previously acquired the - \textit{lock} releases it. + \VAR{lock} releases it. \end{enumerate} -\item \textbf{Data Cache Control \textit{(deprecated)}} +\begin{DeprecateBlock} +\item \textbf{Data Cache Control} \begin{enumerate} \item Implementation of mechanisms to exploit the capabilities of hardware cache if available. \end{enumerate} +\end{DeprecateBlock} + \end{enumerate} diff --git a/content/rma_intro.tex b/content/rma_intro.tex index c06a23f93..eea89511c 100644 --- a/content/rma_intro.tex +++ b/content/rma_intro.tex @@ -1,48 +1,62 @@ The \ac{RMA} routines described in this section are one-sided communication -mechanisms of the \openshmem{} \ac{API}. While using these mechanisms, the user +mechanisms of the \openshmem \ac{API}. While using these mechanisms, the user is required to provide parameters only on the calling side. A characteristic of one-sided communication is that it decouples communication from the synchronization. One-sided communication mechanisms transfer the data but do not synchronize the sender of the data with the receiver of the data. -\openshmem{} \ac{RMA} routines are all performed on the symmetric objects. The +\openshmem \ac{RMA} routines are all performed on the symmetric objects. The initiator \ac{PE} of the call is designated as \source{}, and the \ac{PE} in which memory is accessed is designated as \dest{}. In the case of the remote update routine, \PUT{}, the origin is the \source{} \ac{PE} and the destination \ac{PE} is the \dest{} PE. In the case of the remote read routine, \GET{}, the origin is the \dest{} \ac{PE} and the destination is the \source{} \ac{PE}. -Where appropriate compiler support is available, \openshmem{} provides type-generic -one-sided communication interfaces via \Celev{} generic selection -(\Celev{} \S6.5.1.1\footnote{Formally, the \Celev{} specification is ISO/IEC 9899:2011(E).}) +Where appropriate compiler support is available, \openshmem provides type-generic +one-sided communication interfaces via \Cstd[11] generic selection +(\Cstd[11]~\S6.5.1.1\footnote{Formally, the \Cstd[11] specification is ISO/IEC 9899:2011(E).}) for block, scalar, and block-strided put and get communication. -Such type-generic routines are supported for the ``standard \ac{RMA} types’’ -identified in Table \ref{stdrmatypes}. +Such type-generic routines are supported for the ``standard \ac{RMA} types'' +listed in Table \ref{stdrmatypes}. + +The standard \ac{RMA} types include the exact-width integer types defined in +\HEADER{stdint.h} by \Cstd[99]% +\footnote{Formally, the \Cstd[99] specification is ISO/IEC~9899:1999(E).}% +~\S7.18.1.1 and \Cstd[11]~\S7.20.1.1. When the \Cstd translation environment +does not provide exact-width integer types with \HEADER{stdint.h}, an +\openshmem implemementation is not required to provide support for these types. \begin{table}[h] \begin{center} \begin{tabular}{|l|l|} \hline - \TYPE & \TYPENAME\\ - \hline - float & float\\ - \hline - double & double\\ - \hline - long double & longdouble\\ - \hline - char & char\\ - \hline - short & short\\ - \hline - int & int\\ - \hline - long & long\\ - \hline - long long & longlong\\ - \hline + \TYPE & \TYPENAME \\ \hline + float & float \\ \hline + double & double \\ \hline + long double & longdouble \\ \hline + char & char \\ \hline + signed char & schar \\ \hline + short & short \\ \hline + int & int \\ \hline + long & long \\ \hline + long long & longlong \\ \hline + unsigned char & uchar \\ \hline + unsigned short & ushort \\ \hline + unsigned int & uint \\ \hline + unsigned long & ulong \\ \hline + unsigned long long & ulonglong \\ \hline + int8\_t & int8 \\ \hline + int16\_t & int16 \\ \hline + int32\_t & int32 \\ \hline + int64\_t & int64 \\ \hline + uint8\_t & uint8 \\ \hline + uint16\_t & uint16 \\ \hline + uint32\_t & uint32 \\ \hline + uint64\_t & uint64 \\ \hline + size\_t & size \\ \hline + ptrdiff\_t & ptrdiff \\ \hline \end{tabular} - \caption{Standard \ac{RMA} Types and Names} + \TableCaptionRef{Standard \ac{RMA} Types and Names} \label{stdrmatypes} \end{center} \end{table} diff --git a/content/shmem_add.tex b/content/shmem_add.tex deleted file mode 100644 index fb15fca2c..000000000 --- a/content/shmem_add.tex +++ /dev/null @@ -1,68 +0,0 @@ -\apisummary{ - Performs an atomic add operation on a remote symmetric data object. -} - -\begin{apidefinition} - -\begin{C11synopsis} -void shmem_add(TYPE *dest, TYPE value, int pe); -\end{C11synopsis} -where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. - -\begin{Csynopsis} -void shmem__add(TYPE *dest, TYPE value, int pe); -\end{Csynopsis} -where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. - -\begin{Fsynopsis} -INTEGER pe -INTEGER*4 value_i4 -CALL SHMEM_INT4_ADD(dest, value_i4, pe) -INTEGER*8 value_i8 -CALL SHMEM_INT8_ADD(dest, value_i8, pe) -\end{Fsynopsis} - -\begin{apiarguments} - \apiargument{OUT}{dest}{The remotely accessible integer data object to be - updated on the remote \ac{PE}. If you are using \CorCpp, the type of - \dest{} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{value}{The value to be atomically added to \dest. If you - are using \CorCpp, the type of \VAR{value} should match that implied in - the SYNOPSIS section. If you are using \Fortran, it must be of type - integer with an element size of \dest.} - \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \dest{} is to be updated. If you are using \Fortran, it must be a default - integer value.} -\end{apiarguments} - -\apidescription{ - The \FUNC{shmem\_add} routine performs an atomic add operation. It adds - \VAR{value} to \dest{} on \ac{PE} \VAR{pe} and atomically updates the \dest{} - without returning the value. - } - -\apidesctable{ - If you are using \Fortran, \VAR{dest} must be of the following type: -}{Routine}{Data type of \VAR{dest}} - -\apitablerow{SHMEM\_INT4\_ADD}{\CONST{4}-byte integer} -\apitablerow{SHMEM\_INT8\_ADD}{\CONST{8}-byte integer} - -\apireturnvalues{ - None. -} - -\apinotes{ - The term remotely accessible is defined in Section \ref{subsec:memory_model}. -} - -\begin{apiexamples} - -\apicexample - {} - {./example_code/shmem_add_example.c} - {} - -\end{apiexamples} - -\end{apidefinition} diff --git a/content/shmem_addr_accessible.tex b/content/shmem_addr_accessible.tex index ad84a36f8..81e4d78dc 100644 --- a/content/shmem_addr_accessible.tex +++ b/content/shmem_addr_accessible.tex @@ -1,18 +1,18 @@ \apisummary{ - Determines whether an address is accessible via OpenSHMEM data transfer - routines from the specified remote \ac{PE}. + Determines whether an address is accessible via \openshmem data transfer + routines from the specified remote \ac{PE}. } \begin{apidefinition} \begin{Csynopsis} -int shmem_addr_accessible(const void *addr, int pe); +int @\FuncDecl{shmem\_addr\_accessible}@(const void *addr, int pe); \end{Csynopsis} \begin{Fsynopsis} LOGICAL LOG, SHMEM_ADDR_ACCESSIBLE INTEGER pe -LOG = SHMEM_ADDR_ACCESSIBLE(addr, pe) +LOG = @\FuncDecl{SHMEM\_ADDR\_ACCESSIBLE}@(addr, pe) \end{Fsynopsis} \begin{apiarguments} @@ -25,22 +25,12 @@ address is accessible via \openshmem routines from the specified remote \ac{PE}. This routine verifies that the data object is symmetric and accessible with - respect to a remote \ac{PE} via \openshmem data transfer routines. The + respect to a remote \ac{PE} via \openshmem data transfer routines. The specified address \VAR{addr} is a data object on the local \ac{PE}. - - This routine may be particularly useful for hybrid programming with other - communication libraries (such as \ac{MPI}) or parallel languages. For - example, in SGI Altix series systems, for multiple executable MPI programs that - use \openshmem routines, it is important to note that static memory, such as a - \Fortran{} common block or \Clang{} global variable, is symmetric between - processes running from the same executable file, but is not symmetric between - processes running from different executable files. Data allocated from the - symmetric heap (\FUNC{shmem\_malloc} or \FUNC{shpalloc}) is symmetric across the - same or different executable files. } -\apireturnvalues{ - \CorCpp: The return value is \CONST{1} if \VAR{addr} is a symmetric data object +\apireturnvalues{ + \CorCpp: The return value is \CONST{1} if \VAR{addr} is a symmetric data object and accessible via \openshmem routines from the specified remote \ac{PE}; otherwise, it is \CONST{0}. @@ -48,9 +38,18 @@ object and accessible via \openshmem routines from the specified remote \ac{PE}; otherwise, it is \CONST{.FALSE.}. } - + \apinotes{ - None. + This routine may be particularly useful for hybrid programming with other + communication libraries (such as \ac{MPI}) or parallel languages. For + example, when an \ac{MPI} job uses \ac{MPMD} mode, multiple executable + \ac{MPI} programs may use \openshmem routines. In such cases, static + memory, such as a \Fortran common block or \Cstd global variable, is + symmetric between processes running from the same executable file, but is + not symmetric between processes running from different executable files. + Data allocated from the symmetric heap (\FUNC{shmem\_malloc} or + \FUNC{shpalloc}) is symmetric across the same or different executable + files. } \end{apidefinition} diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 19bc3213f..35e3d5108 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -1,55 +1,55 @@ \apisummary{ shmem\_alltoall is a collective routine where each \ac{PE} exchanges a fixed amount of data with all other \acp{PE} in the - \activeset. + active set. } \begin{apidefinition} \begin{Csynopsis} -void shmem_alltoall32(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_alltoall64(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_alltoall32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_alltoall64}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} \begin{Fsynopsis} INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE) INTEGER PE_start, logPE_stride, PE_size, nelems -CALL SHMEM_ALLTOALL32(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_ALLTOALL64(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_ALLTOALL32}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_ALLTOALL64}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) \end{Fsynopsis} \begin{apiarguments} \apiargument{OUT}{dest}{A symmetric data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - \activeset.} + active set.} \apiargument{IN}{source}{A symmetric data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the \activeset{}, ordered according to + elements of data for each \ac{PE} in the active set, ordered according to destination \ac{PE}.} \apiargument{IN}{nelems}{The number of elements to exchange for each \ac{PE}. - \VAR{nelems} must be of type size\_t for \CorCpp. If you are using + \VAR{nelems} must be of type size\_t for \CorCpp. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of - \acp{PE}. \VAR{PE\_start} must be of type integer. If you are using \Fortran, +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of + \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between - consecutive \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of - type integer. If you are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_size}{The number of \acp{PE} in the \activeset. - \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must + consecutive \ac{PE} numbers in the active set. \VAR{logPE\_stride} must be of + type integer. When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set. + \VAR{PE\_size} must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be - of type long and size \CONST{SHMEM\_ALLTOALL\_SYNC\_SIZE}. In \Fortran, - \VAR{pSync} must be of type integer and size - \CONST{SHMEM\_ALLTOALL\_SYNC\_SIZE}. If you are using \Fortran, it must be a - default integer value. Every element of this array must be initialized with - the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the - \activeset{} enter the routine.} +\apiargument{IN}{pSync}{ + A symmetric work array of size \CONST{SHMEM\_ALLTOALL\_SYNC\_SIZE}. + In \CorCpp, \VAR{pSync} must be an array of elements of type \CTYPE{long}. + In \Fortran, \VAR{pSync} must be an array of elements of default integer type. + Every element of this array must be initialized with the value + \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set + enter the routine.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_alltoall} routines are collective routines. Each \ac{PE} - in the \activeset{} exchanges \VAR{nelems} data elements of size + in the active set exchanges \VAR{nelems} data elements of size 32 bits (for \FUNC{shmem\_alltoall32}) or 64 bits (for \FUNC{shmem\_alltoall64}) with all other \acp{PE} in the set. The data being sent and received are stored in a contiguous symmetric data object. The total size of each \acp{PE} @@ -57,27 +57,31 @@ an element (32 bits or 64 bits) times \VAR{PE\_size}. The \VAR{source} object contains \VAR{PE\_size} blocks of data (the size of each block defined by \VAR{nelems}) and each block of data is sent to a different \ac{PE}. - \ac{PE} \VAR{i} sends the \VAR{j}th block of its \VAR{source} object to - \ac{PE} \VAR{j} and that block of data is placed in the \VAR{i}th block of + Given a \ac{PE} \VAR{i} that is the \kth PE in the active set and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} in the active set, + \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to + the \kth block of the \VAR{dest} object of \ac{PE} \VAR{j}. As with all \openshmem collective routines, this routine assumes - that only \acp{PE} in the \activeset{} call the routine. If a \ac{PE} not - in the \activeset{} calls an \openshmem collective routine, undefined - behavior results. + that only \acp{PE} in the active set call the routine. If a \ac{PE} not + in the active set calls an \openshmem collective routine, + the behavior is undefined. The values of arguments \VAR{nelems}, \VAR{PE\_start}, \VAR{logPE\_stride}, - and \VAR{PE\_size} must be equal on all \acp{PE} in the \activeset. The same + and \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same \VAR{dest} and \VAR{source} data objects, and the same \VAR{pSync} work - array must be passed to all \acp{PE} in the \activeset. - - Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following - conditions must exist (synchronization via a barrier or some other method is - often needed to ensure this): The \VAR{pSync} array on all \acp{PE} in the - \activeset{} is not still in use from a prior call to a - \FUNC{shmem\_alltoall/s} routine. The \VAR{dest} data object on - all \acp{PE} in the \activeset{} is ready to accept the - \FUNC{shmem\_alltoall} data. + array must be passed to all \acp{PE} in the active set. + + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{pSync} array on all \acp{PE} in the active set is not + still in use from a prior call to a \FUNC{shmem\_alltoall} routine. + \item The \VAR{dest} data object on all \acp{PE} in the active set is + ready to accept the \FUNC{shmem\_alltoall} data. + \end{itemize} + Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: Its \VAR{dest} symmetric data object is completely updated and @@ -101,16 +105,16 @@ This routine restores \VAR{pSync} to its original contents. Multiple calls to \openshmem\ routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. - You must ensure the that the \VAR{pSync} array is not being updated by any - \ac{PE} in the \activeset{} while any of the \acp{PE} participates in + The user must ensure that the \VAR{pSync} array is not being updated by any + \ac{PE} in the active set while any of the \acp{PE} participates in processing of an \openshmem\ \FUNC{shmem\_alltoall} routine. Be careful to avoid these situations: If the \VAR{pSync} array is initialized at run time, some type of synchronization is needed to ensure that all \acp{PE} in the - \activeset{} have initialized \VAR{pSync} before any of them enter an + active set have initialized \VAR{pSync} before any of them enter an \openshmem\ routine called with the \VAR{pSync} synchronization array. A \VAR{pSync} array may be reused on a subsequent \openshmem\ \FUNC{shmem\_alltoall} routine only if none of the \acp{PE} in the - \activeset{} are still processing a prior \openshmem\ \FUNC{shmem\_alltoall} + active set are still processing a prior \openshmem\ \FUNC{shmem\_alltoall} routine call that used the same \VAR{pSync} array. In general, this can be ensured only by doing some type of synchronization. } diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index a23780ed9..0f2aa47da 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -1,90 +1,95 @@ \apisummary{ shmem\_alltoalls is a collective routine where each \ac{PE} exchanges a fixed amount of strided data with all other - \acp{PE} in the \activeset. + \acp{PE} in the active set. } \begin{apidefinition} \begin{Csynopsis} -void shmem_alltoalls32(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_alltoalls64(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_alltoalls32}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_alltoalls64}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} \begin{Fsynopsis} INTEGER pSync(SHMEM_ALLTOALLS_SYNC_SIZE) INTEGER dst, sst, PE_start, logPE_stride, PE_size INTEGER nelems -CALL SHMEM_ALLTOALLS32(dest, source, dst, sst, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_ALLTOALLS64(dest, source, dst, sst, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_ALLTOALLS32}@(dest, source, dst, sst, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_ALLTOALLS64}@(dest, source, dst, sst, nelems, PE_start, logPE_stride, PE_size, pSync) \end{Fsynopsis} \begin{apiarguments} \apiargument{OUT}{dest}{A symmetric data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - \activeset.} + active set.} \apiargument{IN}{source}{A symmetric data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the \activeset{}, ordered according to + elements of data for each \ac{PE} in the active set, ordered according to destination \ac{PE}.} \apiargument{IN}{dst}{The stride between consecutive elements of the \dest{} data object. The stride is scaled by the element size. A value of \CONST{1} indicates contiguous data. \VAR{dst} must be of type - \textit{ptrdiff\_t}. If you are using \Fortran, it must be a default integer + \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} \apiargument{IN}{sst}{The stride between consecutive elements of the \source{} data object. The stride is scaled by the element size. A value of \CONST{1} indicates contiguous data. \VAR{sst} must be - of type \textit{ptrdiff\_t}. If you are using \Fortran, it must be a + of type \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} \apiargument{IN}{nelems}{The number of elements to exchange for each \ac{PE}. - \VAR{nelems} must be of type size\_t for \CorCpp. If you are using + \VAR{nelems} must be of type size\_t for \CorCpp. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of - \acp{PE}. \VAR{PE\_start} must be of type integer. If you are using \Fortran, +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of + \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between - consecutive \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of - type integer. If you are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_size}{The number of \acp{PE} in the \activeset. - \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must + consecutive \ac{PE} numbers in the active set. \VAR{logPE\_stride} must be of + type integer. When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set. + \VAR{PE\_size} must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be - of type long and size \CONST{SHMEM\_ALLTOALLS\_SYNC\_SIZE}. In \Fortran, - \VAR{pSync} must be of type integer and size - \CONST{SHMEM\_ALLTOALLS\_SYNC\_SIZE}. If you are using \Fortran, it must be a - default integer value. Every element of this array must be initialized with - the value \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the - \activeset{} enter the routine.} +\apiargument{IN}{pSync}{ + A symmetric work array of size \CONST{SHMEM\_ALLTOALLS\_SYNC\_SIZE}. + In \CorCpp, \VAR{pSync} must be an array of elements of type \CTYPE{long}. + In \Fortran, \VAR{pSync} must be an array of elements of default integer type. + Every element of this array must be initialized with the value + \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set + enter the routine.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_alltoalls} routines are collective routines. Each \ac{PE} - in the \activeset{} exchanges \VAR{nelems} strided data elements of size + in the active set exchanges \VAR{nelems} strided data elements of size 32 bits (for \FUNC{shmem\_alltoalls32}) or 64 bits (for \FUNC{shmem\_alltoalls64}) with all other \acp{PE} in the set. Both strides, \VAR{dst} and \VAR{sst}, must be greater - than or equal to \CONST{1}. The \VAR{sst}*\VAR{j}th block sent from \ac{PE} \VAR{i} to - \ac{PE} \VAR{j} is placed in the \VAR{dst}*\VAR{i}th block of the \VAR{dest} data object on + than or equal to \CONST{1}. + Given a \ac{PE} \VAR{i} that is the \kth PE in the active set and a \ac{PE} + \VAR{j} that is the \lth \ac{PE} in the active set, + \ac{PE} \VAR{i} sends the \VAR{sst}*\lth block of the \VAR{source} data object to + the \VAR{dst}*\kth block of the \VAR{dest} data object on \ac{PE} \VAR{j}. As with all \openshmem collective routines, these routines assume - that only \acp{PE} in the \activeset{} call the routine. If a \ac{PE} not - in the \activeset{} calls an \openshmem collective routine, undefined + that only \acp{PE} in the active set call the routine. If a \ac{PE} not + in the active set calls an \openshmem collective routine, undefined behavior results. The values of arguments \VAR{dst}, \VAR{sst}, \VAR{nelems}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be equal on all \acp{PE} in the - \activeset. The same \VAR{dest} and \VAR{source} data objects, and the same - \VAR{pSync} work array must be passed to all \acp{PE} in the \activeset. - - Before any \ac{PE} calls to a \FUNC{shmem\_alltoalls} routine, the following - conditions must exist (synchronization via a barrier or some other method is - often needed to ensure this): The \VAR{pSync} array on all \acp{PE} in the - \activeset{} is not still in use from a prior call to a - \FUNC{shmem\_alltoalls} routine. The \VAR{dest} data object on - all \acp{PE} in the \activeset{} is ready to accept the - \FUNC{shmem\_alltoalls} data. - + active set. The same \VAR{dest} and \VAR{source} data objects, and the same + \VAR{pSync} work array must be passed to all \acp{PE} in the active set. + + Before any \ac{PE} calls a \FUNC{shmem\_alltoalls} routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{pSync} array on all \acp{PE} in the active set is not + still in use from a prior call to a \FUNC{shmem\_alltoall} routine. + \item The \VAR{dest} data object on all \acp{PE} in the active set is + ready to accept the \FUNC{shmem\_alltoalls} data. + \end{itemize} + Otherwise, the behavior is undefined. + Upon return from a \FUNC{shmem\_alltoalls} routine, the following is true for the local PE: Its \VAR{dest} symmetric data object is completely updated and the data has been copied out of the \VAR{source} data object. @@ -107,16 +112,16 @@ This routine restores \VAR{pSync} to its original contents. Multiple calls to \openshmem\ routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. - You must ensure the that the \VAR{pSync} array is not being updated by any - \ac{PE} in the \activeset{} while any of the \acp{PE} participates in + The user must ensure that the \VAR{pSync} array is not being updated by any + \ac{PE} in the active set while any of the \acp{PE} participates in processing of an \openshmem\ \FUNC{shmem\_alltoalls} routine. Be careful to avoid these situations: If the \VAR{pSync} array is initialized at run time, some type of synchronization is needed to ensure that all \acp{PE} in the - \activeset{} have initialized \VAR{pSync} before any of them enter an + active set have initialized \VAR{pSync} before any of them enter an \openshmem\ routine called with the \VAR{pSync} synchronization array. A \VAR{pSync} array may be reused on a subsequent \openshmem\ \FUNC{shmem\_alltoalls} routine only if none of the \acp{PE} in the - \activeset{} are still processing a prior \openshmem\ \FUNC{shmem\_alltoalls} + active set are still processing a prior \openshmem\ \FUNC{shmem\_alltoalls} routine call that used the same \VAR{pSync} array. In general, this can be ensured only by doing some type of synchronization. } diff --git a/content/shmem_atomic_add.tex b/content/shmem_atomic_add.tex new file mode 100644 index 000000000..111c2111a --- /dev/null +++ b/content/shmem_atomic_add.tex @@ -0,0 +1,87 @@ +\apisummary{ + Performs an atomic add operation on a remote symmetric data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_add}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_add}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_add}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_add}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +void @\FuncDecl{shmem\_add}@(TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_add}@(TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\} +and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +INTEGER pe +INTEGER*4 value_i4 +CALL @\FuncDecl{SHMEM\_INT4\_ADD}@(dest, value_i4, pe) +INTEGER*8 value_i8 +CALL @\FuncDecl{SHMEM\_INT8\_ADD}@(dest, value_i8, pe) +\end{Fsynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{The remotely accessible integer data object to be + updated on the remote \ac{PE}. When using \CorCpp, the type of + \dest{} should match that implied in the SYNOPSIS section.} + \apiargument{IN}{value}{The value to be atomically added to \dest. When using \CorCpp, the type of \VAR{value} should match that implied in + the SYNOPSIS section. When using \Fortran, it must be of type + integer with an element size of \dest.} + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which + \dest{} is to be updated. When using \Fortran, it must be a default + integer value.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_atomic\_add} routine performs an atomic add operation. It adds + \VAR{value} to \dest{} on \ac{PE} \VAR{pe} and atomically updates the \dest{} + without returning the value. + } + +\apidesctable{ + When using \Fortran, \VAR{dest} and \VAR{value} must be of the following type: +}{Routine}{Data type of \VAR{dest} and \VAR{value}} + +\apitablerow{SHMEM\_INT4\_ADD}{\CONST{4}-byte integer} +\apitablerow{SHMEM\_INT8\_ADD}{\CONST{8}-byte integer} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\begin{apiexamples} + +\apicexample + {} + {./example_code/shmem_atomic_add_example.c} + {} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_atomic_and.tex b/content/shmem_atomic_and.tex new file mode 100644 index 000000000..d356b5d6b --- /dev/null +++ b/content/shmem_atomic_and.tex @@ -0,0 +1,49 @@ +\apisummary{ + Atomically perform a non-fetching bitwise AND operation on a + remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_and}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_and}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_and}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_and}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise AND operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_and} atomically performs a non-fetching bitwise AND + on the remotely accessible data object pointed to by \VAR{dest} at PE + \VAR{pe} with the operand \VAR{value}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_cswap.tex b/content/shmem_atomic_compare_swap.tex similarity index 53% rename from content/shmem_cswap.tex rename to content/shmem_atomic_compare_swap.tex index 1b9a8886d..c38f5700f 100644 --- a/content/shmem_cswap.tex +++ b/content/shmem_atomic_compare_swap.tex @@ -5,48 +5,67 @@ \begin{apidefinition} \begin{C11synopsis} -TYPE shmem_cswap(TYPE *dest, TYPE cond, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_compare\_swap}@(TYPE *dest, TYPE cond, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_compare\_swap}@(shmem_ctx_t ctx, TYPE *dest, TYPE cond, TYPE value, int pe); \end{C11synopsis} -where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. \begin{Csynopsis} -TYPE shmem__cswap(TYPE *dest, TYPE cond, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_compare\_swap}@(TYPE *dest, TYPE cond, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_compare\_swap}@(shmem_ctx_t ctx, TYPE *dest, TYPE cond, TYPE value, int pe); \end{Csynopsis} -where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_cswap}@(TYPE *dest, TYPE cond, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_cswap}@(TYPE *dest, TYPE cond, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\} +and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. +\end{DeprecateBlock} \begin{Fsynopsis} INTEGER pe INTEGER*4 SHMEM_INT4_CSWAP, cond_i4, value_i4, ires_i4 -ires_i4 = SHMEM_INT4_CSWAP(dest, cond_i4, value_i4, pe) +ires\_i4 = @\FuncDecl{SHMEM\_INT4\_CSWAP}@(dest, cond_i4, value_i4, pe) INTEGER*8 SHMEM_INT8_CSWAP, cond_i8, value_i8, ires_i8 -ires_i8 = SHMEM_INT8_CSWAP(dest, cond_i8, value_i8, pe) +ires\_i8 = @\FuncDecl{SHMEM\_INT8\_CSWAP}@(dest, cond_i8, value_i8, pe) \end{Fsynopsis} \begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \apiargument{OUT}{dest}{The remotely accessible integer data object to be updated on the remote \ac{PE}. } \apiargument{IN}{cond}{\VAR{cond} is compared to the remote \VAR{dest} value. If \VAR{cond} and the remote \VAR{dest} are equal, then \VAR{value} - is swapped into the remote \VAR{dest}. Otherwise, the remote \VAR{dest} is + is swapped into the remote \VAR{dest}; otherwise, the remote \VAR{dest} is unchanged. In either case, the old value of the remote \VAR{dest} is returned as the routine return value. \VAR{cond} must be of the same data type as \VAR{dest}.} \apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}. \VAR{value} must be the same data type as \VAR{dest}.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number upon which - \VAR{dest} is to be updated. If you are using \Fortran, it must be a default + \VAR{dest} is to be updated. When using \Fortran, it must be a default integer value.} \end{apiarguments} -\apidescription{ +\apidescription{ The conditional swap routines conditionally update a \VAR{dest} data object on the specified \ac{PE} and return the prior contents of the data object in one - atomic operation. + atomic operation. } \apidesctable{ - The \VAR{dest} and \VAR{value} data objects must conform to certain typing - constraints, which are as follows: -}{Routine}{Data type of \VAR{dest} and \VAR{value}} + When using \Fortran, \VAR{dest}, \VAR{cond}, and \VAR{value} must be of the following type: +}{Routine}{Data type of \VAR{dest}, \VAR{cond}, and \VAR{value}} \apitablerow{SHMEM\_INT4\_CSWAP}{\CONST{4}-byte integer.} \apitablerow{SHMEM\_INT8\_CSWAP}{\CONST{8}-byte integer.} @@ -68,7 +87,7 @@ {The following call ensures that the first \ac{PE} to execute the conditional swap will successfully write its \ac{PE} number to \VAR{race\_winner} on \ac{PE} \CONST{0}.} - {./example_code/shmem_cswap_example.c} + {./example_code/shmem_atomic_compare_swap_example.c} {} \end{apiexamples} diff --git a/content/shmem_atomic_fetch.tex b/content/shmem_atomic_fetch.tex new file mode 100644 index 000000000..146865e55 --- /dev/null +++ b/content/shmem_atomic_fetch.tex @@ -0,0 +1,75 @@ +\apisummary{ + Atomically fetches the value of a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_atomic\_fetch}@(const TYPE *source, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch}@(shmem_ctx_t ctx, const TYPE *source, int pe); +\end{C11synopsis} +where \TYPE{} is one of the extended \ac{AMO} types specified by +Table~\ref{extamotypes}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch}@(const TYPE *source, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch}@(shmem_ctx_t ctx, const TYPE *source, int pe); +\end{Csynopsis} +where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_fetch}@(const TYPE *source, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{float}, \CTYPE{double}, \CTYPE{int}, +\CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_fetch}@(const TYPE *source, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{float}, \CTYPE{double}, \CTYPE{int}, +\CTYPE{long}, \CTYPE{long long}\} and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +INTEGER pe +INTEGER*4 SHMEM_INT4_FETCH, ires_i4 +ires\_i4 = @\FuncDecl{SHMEM\_INT4\_FETCH}@(source, pe) +INTEGER*8 SHMEM_INT8_FETCH, ires_i8 +ires\_i8 = @\FuncDecl{SHMEM\_INT8\_FETCH}@(source, pe) +REAL*4 SHMEM_REAL4_FETCH, res_r4 +res\_r4 = @\FuncDecl{SHMEM\_REAL4\_FETCH}@(source, pe) +REAL*8 SHMEM_REAL8_FETCH, res_r8 +res\_r8 = @\FuncDecl{SHMEM\_REAL8\_FETCH}@(source, pe) +\end{Fsynopsis} + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{source}{The remotely accessible data object to be fetched from + the remote \ac{PE}.} + \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number from which + \VAR{source} is to be fetched.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_fetch} performs an atomic fetch operation. + It returns the contents of the \VAR{source} as an atomic operation. +} + +\apireturnvalues{ + The contents at the \VAR{source} address on the remote \ac{PE}. + The data type of the return value is the same as the type of + the remote data object. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_fadd.tex b/content/shmem_atomic_fetch_add.tex similarity index 50% rename from content/shmem_fadd.tex rename to content/shmem_atomic_fetch_add.tex index 4487ff37e..5c9da474c 100644 --- a/content/shmem_fadd.tex +++ b/content/shmem_atomic_fetch_add.tex @@ -5,38 +5,58 @@ \begin{apidefinition} \begin{C11synopsis} -TYPE shmem_fadd(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch\_add}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch\_add}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); \end{C11synopsis} -where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. \begin{Csynopsis} -TYPE shmem__fadd(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_add}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_add}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); \end{Csynopsis} -where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_fadd}@(TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_fadd}@(TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\} +and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. +\end{DeprecateBlock} \begin{Fsynopsis} INTEGER pe INTEGER*4 SHMEM_INT4_FADD, ires_i4, value_i4 -ires_i4 = SHMEM_INT4_FADD(dest, value_i4, pe) +ires\_i4 = @\FuncDecl{SHMEM\_INT4\_FADD}@(dest, value_i4, pe) INTEGER*8 SHMEM_INT8_FADD, ires_i8, value_i8 -ires_i8 = SHMEM_INT8_FADD(dest, value_i8, pe) +ires\_i8 = @\FuncDecl{SHMEM\_INT8\_FADD}@(dest, value_i8, pe) \end{Fsynopsis} \begin{apiarguments} +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \apiargument{OUT}{dest}{The remotely accessible integer data object to be updated on the remote \ac{PE}. The type of \VAR{dest} should match that implied in the SYNOPSIS section.} \apiargument{IN}{value}{The value to be atomically added to \VAR{dest}. The type of \VAR{value} should match that implied in the SYNOPSIS section.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated. If you are using \Fortran, it must be a default + \VAR{dest} is to be updated. When using \Fortran, it must be a default integer value.} \end{apiarguments} \apidescription{ - \FUNC{shmem\_fadd} routines perform an atomic fetch-and-add operation. An + \FUNC{shmem\_atomic\_fetch\_add} routines perform an atomic fetch-and-add operation. An atomic fetch-and-add operation fetches the old \VAR{dest} and adds \VAR{value} to \VAR{dest} without the possibility of another atomic operation on the \VAR{dest} between the time of the fetch and the update. These routines add @@ -45,8 +65,8 @@ } \apidesctable{ - If you are using \Fortran, \VAR{dest} must be of the following type: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} + When using \Fortran, \VAR{dest} and \VAR{value} must be of the following type: +}{Routine}{Data type of \VAR{dest} and \VAR{value}} \apitablerow{SHMEM\_INT4\_FADD}{\CONST{4}-byte integer} \apitablerow{SHMEM\_INT8\_FADD}{\CONST{8}-byte integer} @@ -65,8 +85,9 @@ \begin{apiexamples} \apicexample - {The following \FUNC{shmem\_fadd} example is for \CorCpp{} programs:} - {./example_code/shmem_fadd_example.c} + {The following \FUNC{shmem\_atomic\_fetch\_add} example is for + \Cstd[11] programs:} + {./example_code/shmem_atomic_fetch_add_example.c} {} \end{apiexamples} diff --git a/content/shmem_atomic_fetch_and.tex b/content/shmem_atomic_fetch_and.tex new file mode 100644 index 000000000..63b53b27e --- /dev/null +++ b/content/shmem_atomic_fetch_and.tex @@ -0,0 +1,49 @@ +\apisummary{ + Atomically perform a fetching bitwise AND operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_atomic\_fetch\_and}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch\_and}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_and}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_and}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise AND operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_fetch\_and} atomically performs a fetching bitwise AND + on the remotely accessible data object pointed to by \VAR{dest} at PE + \VAR{pe} with the operand \VAR{value}. +} + +\apireturnvalues{ + The value pointed to by \VAR{dest} on PE \VAR{pe} immediately before the + operation is performed. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_atomic_fetch_inc.tex b/content/shmem_atomic_fetch_inc.tex new file mode 100644 index 000000000..66c154e3c --- /dev/null +++ b/content/shmem_atomic_fetch_inc.tex @@ -0,0 +1,90 @@ +\apisummary{ + Performs an atomic fetch-and-increment operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_atomic\_fetch\_inc}@(TYPE *dest, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch\_inc}@(shmem_ctx_t ctx, TYPE *dest, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_inc}@(TYPE *dest, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_inc}@(shmem_ctx_t ctx, TYPE *dest, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_finc}@(TYPE *dest, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_finc}@(TYPE *dest, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\} +and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +INTEGER pe +INTEGER*4 SHMEM_INT4_FINC, ires_i4 +ires\_i4 = @\FuncDecl{SHMEM\_INT4\_FINC}@(dest, pe) +INTEGER*8 SHMEM_INT8_FINC, ires_i8 +ires\_i8 = @\FuncDecl{SHMEM\_INT8\_FINC}@(dest, pe) +\end{Fsynopsis} + + +\begin{apiarguments} + +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{OUT}{dest}{The remotely accessible integer data object to be updated + on the remote \ac{PE}. The type of \dest{} should match that implied in the + SYNOPSIS section.} +\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which + \dest{} is to be updated. When using \Fortran, it must be a default + integer value.} + +\end{apiarguments} + + +\apidescription{ + These routines perform a fetch-and-increment operation. The \dest{} on + \ac{PE} \VAR{pe} is increased by one and the routine returns the previous + contents of \dest{} as an atomic operation. +} + +\apidesctable{ + When using \Fortran, \VAR{dest} must be of the following type: +}{Routine}{Data type of \VAR{dest}} + +\apitablerow{SHMEM\_INT4\_FINC}{\CONST{4}-byte integer} +\apitablerow{SHMEM\_INT8\_FINC}{\CONST{8}-byte integer} + +\apireturnvalues{ + The contents that had been at the \dest{} address on the remote \ac{PE} prior to + the increment. The data type of the return value is the same as the \dest. +} + +\apinotes{ + None. +} + +\begin{apiexamples} + +\apicexample + {The following \FUNC{shmem\_atomic\_fetch\_inc} example is for + \Cstd[11] programs:} + {./example_code/shmem_atomic_fetch_inc_example.c} + {} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_atomic_fetch_or.tex b/content/shmem_atomic_fetch_or.tex new file mode 100644 index 000000000..23892b555 --- /dev/null +++ b/content/shmem_atomic_fetch_or.tex @@ -0,0 +1,49 @@ +\apisummary{ + Atomically perform a fetching bitwise OR operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_atomic\_fetch\_or}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch\_or}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_or}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_or}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise OR operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_fetch\_or} atomically performs a fetching bitwise OR + on the remotely accessible data object pointed to by \VAR{dest} at PE + \VAR{pe} with the operand \VAR{value}. +} + +\apireturnvalues{ + The value pointed to by \VAR{dest} on PE \VAR{pe} immediately before the + operation is performed. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_atomic_fetch_xor.tex b/content/shmem_atomic_fetch_xor.tex new file mode 100644 index 000000000..1a36b4bb8 --- /dev/null +++ b/content/shmem_atomic_fetch_xor.tex @@ -0,0 +1,50 @@ +\apisummary{ + Atomically perform a fetching bitwise exclusive OR (XOR) operation on a + remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_atomic\_fetch\_xor}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_fetch\_xor}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_fetch\_xor}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_fetch\_xor}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise XOR operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_fetch\_xor} atomically performs a fetching bitwise XOR + on the remotely accessible data object pointed to by \VAR{dest} at PE + \VAR{pe} with the operand \VAR{value}. +} + +\apireturnvalues{ + The value pointed to by \VAR{dest} on PE \VAR{pe} immediately before the + operation is performed. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_atomic_inc.tex b/content/shmem_atomic_inc.tex new file mode 100644 index 000000000..ff78a3094 --- /dev/null +++ b/content/shmem_atomic_inc.tex @@ -0,0 +1,85 @@ +\apisummary{ + Performs an atomic increment operation on a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_inc}@(TYPE *dest, int pe); +void @\FuncDecl{shmem\_atomic\_inc}@(shmem_ctx_t ctx, TYPE *dest, int pe); +\end{C11synopsis} +where \TYPE{} is one of the standard \ac{AMO} types specified by +Table~\ref{stdamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_inc}@(TYPE *dest, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_inc}@(shmem_ctx_t ctx, TYPE *dest, int pe); +\end{Csynopsis} +where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{stdamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +void @\FuncDecl{shmem\_inc}@(TYPE *dest, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_inc}@(TYPE *dest, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{int}, \CTYPE{long}, \CTYPE{long long}\} +and has a corresponding \TYPENAME{} specified by Table~\ref{stdamotypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +INTEGER pe +CALL @\FuncDecl{SHMEM\_INT4\_INC}@(dest, pe) +CALL @\FuncDecl{SHMEM\_INT8\_INC}@(dest, pe) +\end{Fsynopsis} + +\begin{apiarguments} + +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{OUT}{dest}{The remotely accessible integer data object to be updated + on the remote \ac{PE}. The type of \dest{} should match that implied in the + SYNOPSIS section.} +\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which + \dest{} is to be updated. When using \Fortran, it must be a default + integer value.} + +\end{apiarguments} + +\apidescription{ + These routines perform an atomic increment operation on the \VAR{dest} data + object on \ac{PE}. +} + + +\apidesctable{ + When using \Fortran, \VAR{dest} must be of the following type: +}{Routine}{Data type of \VAR{dest}} + +\apitablerow{SHMEM\_INT4\_INC}{\CONST{4}-byte integer} +\apitablerow{SHMEM\_INT8\_INC}{\CONST{8}-byte integer} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\begin{apiexamples} + +\apicexample + { The following \FUNC{shmem\_atomic\_inc} example is for + \Cstd[11] programs: } + {./example_code/shmem_atomic_inc_example.c} + {} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_atomic_or.tex b/content/shmem_atomic_or.tex new file mode 100644 index 000000000..b53c6bd9f --- /dev/null +++ b/content/shmem_atomic_or.tex @@ -0,0 +1,49 @@ +\apisummary{ + Atomically perform a non-fetching bitwise OR operation on a + remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_or}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_or}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_or}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_or}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise OR operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_or} atomically performs a non-fetching bitwise OR + on the remotely accessible data object pointed to by \VAR{dest} at PE + \VAR{pe} with the operand \VAR{value}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_atomic_set.tex b/content/shmem_atomic_set.tex new file mode 100644 index 000000000..69bc14172 --- /dev/null +++ b/content/shmem_atomic_set.tex @@ -0,0 +1,74 @@ +\apisummary{ + Atomically sets the value of a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_set}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_set}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the extended \ac{AMO} types specified by +Table~\ref{extamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_set}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_set}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +void @\FuncDecl{shmem\_set}@(TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{float}, \CTYPE{double}, \CTYPE{int}, +\CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_set}@(TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{float}, \CTYPE{double}, \CTYPE{int}, +\CTYPE{long}, \CTYPE{long long}\} and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +INTEGER pe +INTEGER*4 SHMEM_INT4_SET, value_i4 +CALL @\FuncDecl{SHMEM\_INT4\_SET}@(dest, value_i4, pe) +INTEGER*8 SHMEM_INT8_SET, value_i8 +CALL @\FuncDecl{SHMEM\_INT8\_SET}@(dest, value_i8, pe) +REAL*4 SHMEM_REAL4_SET, value_r4 +CALL @\FuncDecl{SHMEM\_REAL4\_SET}@(dest, value_r4, pe) +REAL*8 SHMEM_REAL8_SET, value_r8 +CALL @\FuncDecl{SHMEM\_REAL8\_SET}@(dest, value_r8, pe) +\end{Fsynopsis} + +\begin{apiarguments} + +\apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} +\apiargument{OUT}{dest}{The remotely accessible data object to be set on + the remote \ac{PE}.} +\apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}.} +\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which + \VAR{dest} is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_set} performs an atomic set operation. It writes the + \VAR{value} into \VAR{dest} on \VAR{pe} as an atomic operation. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_atomic_swap.tex b/content/shmem_atomic_swap.tex new file mode 100644 index 000000000..e8a1713ae --- /dev/null +++ b/content/shmem_atomic_swap.tex @@ -0,0 +1,96 @@ +\apisummary{ + Performs an atomic swap to a remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_atomic\_swap}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_atomic\_swap}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the extended \ac{AMO} types specified by Table \ref{extamotypes}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_swap}@(TYPE *dest, TYPE value, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_swap}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. + +\begin{DeprecateBlock} +\begin{C11synopsis} +TYPE @\FuncDecl{shmem\_swap}@(TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of \{\CTYPE{float}, \CTYPE{double}, \CTYPE{int}, +\CTYPE{long}, \CTYPE{long long}\}. + +\begin{Csynopsis} +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_swap}@(TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of \{\CTYPE{float}, \CTYPE{double}, \CTYPE{int}, +\CTYPE{long}, \CTYPE{long long}\} and has a corresponding +\TYPENAME{} specified by Table~\ref{extamotypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +INTEGER SHMEM_SWAP, value, pe +ires = @\FuncDecl{SHMEM\_SWAP}@(dest, value, pe) +INTEGER*4 SHMEM_INT4_SWAP, value_i4, ires_i4 +ires\_i4 = @\FuncDecl{SHMEM\_INT4\_SWAP}@(dest, value_i4, pe) +INTEGER*8 SHMEM_INT8_SWAP, value_i8, ires_i8 +ires\_i8 = @\FuncDecl{SHMEM\_INT8\_SWAP}@(dest, value_i8, pe) +REAL*4 SHMEM_REAL4_SWAP, value_r4, res_r4 +res\_r4 = @\FuncDecl{SHMEM\_REAL4\_SWAP}@(dest, value_r4, pe) +REAL*8 SHMEM_REAL8_SWAP, value_r8, res_r8 +res\_r8 = @\FuncDecl{SHMEM\_REAL8\_SWAP}@(dest, value_r8, pe) +\end{Fsynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{The remotely accessible integer data object to be + updated on the remote \ac{PE}. When using \CorCpp, the type of + \dest{} should match that implied in the SYNOPSIS section.} + \apiargument{IN}{value}{The value to be atomically written to the remote + \ac{PE}. \VAR{value} is the same type as \dest.} + \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which + \dest{} is to be updated. When using \Fortran, it must be a default + integer value.} +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_swap} performs an atomic swap operation. + It writes \VAR{value} into \dest{} on \ac{PE} and returns the previous + contents of \dest{} as an atomic operation. +} + +\apidesctable{ + When using \Fortran, \VAR{dest} and \VAR{value} must be of the following type: +}{Routine}{Data type of \VAR{dest} and \VAR{value}} + +\apitablerow{SHMEM\_SWAP}{Integer of default kind} +\apitablerow{SHMEM\_INT4\_SWAP}{\CONST{4}-byte integer} +\apitablerow{SHMEM\_INT8\_SWAP}{\CONST{8}-byte integer} +\apitablerow{SHMEM\_REAL4\_SWAP}{\CONST{4}-byte real} +\apitablerow{SHMEM\_REAL8\_SWAP}{\CONST{8}-byte real} + +\apireturnvalues{ + The content that had been at the \dest{} address on the remote \ac{PE} + prior to the swap is returned. +} + +\apinotes{ + None. +} + +\begin{apiexamples} + +\apicexample + {The example below swaps values between odd numbered \acp{PE} and + their right (modulo) neighbor and outputs the result of swap.} + {./example_code/shmem_atomic_swap_example.c} + {} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_atomic_xor.tex b/content/shmem_atomic_xor.tex new file mode 100644 index 000000000..d5aefba3c --- /dev/null +++ b/content/shmem_atomic_xor.tex @@ -0,0 +1,49 @@ +\apisummary{ + Atomically perform a non-fetching bitwise exclusive OR (XOR) operation on a + remote data object. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_atomic\_xor}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_atomic\_xor}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{C11synopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types specified by +Table~\ref{bitamotypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_atomic\_xor}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_atomic\_xor}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); +\end{Csynopsis} +where \TYPE{} is one of the bitwise \ac{AMO} types and has a corresponding +\TYPENAME{} specified by Table~\ref{bitamotypes}. + +\begin{apiarguments} + + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{A pointer to the remotely accessible data object to + be updated.} + \apiargument{IN}{value}{The operand to the bitwise XOR operation.} + \apiargument{IN}{pe}{An integer value for the \ac{PE} on which \VAR{dest} + is to be updated.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_atomic\_xor} atomically performs a non-fetching bitwise XOR + on the remotely accessible data object pointed to by \VAR{dest} at PE + \VAR{pe} with the operand \VAR{value}. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_barrier.tex b/content/shmem_barrier.tex index 5c088ddb8..c5f7adea5 100644 --- a/content/shmem_barrier.tex +++ b/content/shmem_barrier.tex @@ -1,60 +1,61 @@ \apisummary{ Performs all operations described in the \FUNC{shmem\_barrier\_all} interface - but with respect to a subset of \acp{PE} defined by the \activeset. + but with respect to a subset of \acp{PE} defined by the active set. } \begin{apidefinition} \begin{Csynopsis} -void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_barrier}@(int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} \begin{Fsynopsis} INTEGER PE_start, logPE_stride, PE_size INTEGER pSync(SHMEM_BARRIER_SYNC_SIZE) -CALL SHMEM_BARRIER(PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_BARRIER}@(PE_start, logPE_stride, PE_size, pSync) \end{Fsynopsis} \begin{apiarguments} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset of \acp{PE}. - \VAR{PE\_start} must be of type integer. If you are using \Fortran, it must be +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}. + \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive - \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of type integer. - If you are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_size}{The number of \acp{PE} in the \activeset. \VAR{PE\_size} - must be of type integer. If you are using \Fortran, it must be a default + \ac{PE} numbers in the active set. \VAR{logPE\_stride} must be of type integer. + When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set. \VAR{PE\_size} + must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must - be of type long and size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. In \Fortran, - \VAR{pSync} must be of type integer and size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. - If you are using \Fortran, it must be a default integer type. Every element +\apiargument{IN}{pSync}{ + A symmetric work array of size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. + In \CorCpp, \VAR{pSync} must be an array of elements of type \CTYPE{long}. + In \Fortran, \VAR{pSync} must be an array of elements of default integer type. + Every element of this array must be initialized to \CONST{SHMEM\_SYNC\_VALUE} before any of - the \acp{PE} in the \activeset enter \FUNC{shmem\_barrier} the first time.} + the \acp{PE} in the active set enter \FUNC{shmem\_barrier} the first time.} \end{apiarguments} \apidescription{ \FUNC{shmem\_barrier} is a collective synchronization routine over an - \activeset. Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in - the \activeset (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + active set. Control returns from \FUNC{shmem\_barrier} after all \acp{PE} in + the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size}) have called \FUNC{shmem\_barrier}. As with all \openshmem collective routines, each of these routines assumes that - only \acp{PE} in the \activeset call the routine. If a \ac{PE} not in the - \activeset calls an \openshmem collective routine, undefined behavior results. + only \acp{PE} in the active set call the routine. If a \ac{PE} not in the + active set calls an \openshmem collective routine, the behavior is undefined. The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} - must be equal on all \acp{PE} in the \activeset. The same work array must be - passed in \VAR{pSync} to all \acp{PE} in the \activeset. + must be the same value on all \acp{PE} in the active set. The same work array must be + passed in \VAR{pSync} to all \acp{PE} in the active set. \FUNC{shmem\_barrier} ensures that all previously issued stores and remote memory updates, including \acp{AMO} and \ac{RMA} operations, done by any of the - \acp{PE} in the \activeset are complete before returning. + \acp{PE} in the active set on the default context are complete before returning. The same \VAR{pSync} array may be reused on consecutive calls to - \FUNC{shmem\_barrier} if the same active \ac{PE} set is used. + \FUNC{shmem\_barrier} if the same active set is used. } \apireturnvalues{ @@ -62,19 +63,28 @@ } \apinotes{ - If the \VAR{pSync} array is initialized at run time, be sure to use some type of - synchronization, for example, a call to \FUNC{shmem\_barrier\_all}, before - calling \FUNC{shmem\_barrier} for the first time. + If the \VAR{pSync} array is initialized at the run time, all + \acp{PE} must be synchronized before the first call to \FUNC{shmem\_barrier} + (e.g., by \FUNC{shmem\_barrier\_all}) to ensure the array has been initialized + by all \acp{PE} before it is used. - If the \activeset does not change, \FUNC{shmem\_barrier} can be called + If the active set does not change, \FUNC{shmem\_barrier} can be called repeatedly with the same \VAR{pSync} array. No additional synchronization beyond that implied by \FUNC{shmem\_barrier} itself is necessary in this case. + + The \FUNC{shmem\_barrier} routine can be used to + portably ensure that memory access operations observe remote updates in the order + enforced by initiator \acp{PE}. + + Calls to \FUNC{shmem\_ctx\_quiet} can be performed prior + to calling the barrier routine to ensure completion of operations issued on + additional contexts. } \begin{apiexamples} \apicexample - {The following barrier example is for \CorCpp programs:} + {The following barrier example is for \Cstd[11] programs:} {./example_code/shmem_barrier_example.c} {} diff --git a/content/shmem_barrier_all.tex b/content/shmem_barrier_all.tex index c1bf5c0a4..8392054bf 100644 --- a/content/shmem_barrier_all.tex +++ b/content/shmem_barrier_all.tex @@ -1,17 +1,17 @@ \apisummary{ - Registers the arrival of a \ac{PE} at a barrier and suspends \ac{PE} execution - until all other \ac{PE}s arrive at the barrier and all local and remote memory - updates are completed. + Registers the arrival of a \ac{PE} at a barrier and blocks the \ac{PE} + until all other \acp{PE} arrive at the barrier and all local + updates and remote memory updates on the default context are completed. } \begin{apidefinition} \begin{Csynopsis} -void shmem_barrier_all(void); +void @\FuncDecl{shmem\_barrier\_all}@(void); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_BARRIER_ALL +CALL @\FuncDecl{SHMEM\_BARRIER\_ALL}@ \end{Fsynopsis} \begin{apiarguments} @@ -22,14 +22,15 @@ \apidescription{ The \FUNC{shmem\_barrier\_all} routine registers the arrival of a \ac{PE} at - a barrier. Barriers are a fast mechanism for synchronizing all \ac{PE}s at - once. This routine causes a \ac{PE} to suspend execution until all \ac{PE}s - have called \FUNC{shmem\_barrier\_all}. This routine must be used with - \ac{PE}s started by \FUNC{shmem\_init}. + a barrier. Barriers are a mechanism for synchronizing all \acp{PE} at + once. This routine blocks the \ac{PE} until all \acp{PE} have called + \FUNC{shmem\_barrier\_all}. In a multithreaded \openshmem + program, only the calling thread is blocked. - Prior to synchronizing with other \ac{PE}s, \FUNC{shmem\_barrier\_all} + Prior to synchronizing with other \acp{PE}, \FUNC{shmem\_barrier\_all} ensures completion of all previously issued memory stores and remote memory - updates issued via \openshmem{} \acp{AMO} and \ac{RMA} routine calls such + updates issued on the default context via \openshmem \acp{AMO} and + \ac{RMA} routine calls such as \FUNC{shmem\_int\_add}, \FUNC{shmem\_put32}, \FUNC{shmem\_put\_nbi}, and \FUNC{shmem\_get\_nbi}. } @@ -39,13 +40,19 @@ } \apinotes{ - None. + The \FUNC{shmem\_barrier\_all} routine can be used to + portably ensure that memory access operations observe remote updates in the order + enforced by initiator \acp{PE}. + + Calls to \FUNC{shmem\_ctx\_quiet} can be performed prior + to calling the barrier routine to ensure completion of operations issued on + additional contexts. } \begin{apiexamples} \apicexample - { The following \FUNC{shmem\_barrier\_all} example is for \CorCpp programs:} + { The following \FUNC{shmem\_barrier\_all} example is for \Cstd[11] programs:} {./example_code/shmem_barrierall_example.c} {} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 74ff2bbb5..06a9bec5f 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -1,22 +1,22 @@ \apisummary{ Broadcasts a block of data from one \ac{PE} to one or more destination - \ac{PE}s. + \acp{PE}. } \begin{apidefinition} \begin{Csynopsis} -void shmem_broadcast32(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_broadcast64(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_broadcast32}@(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_broadcast64}@(void *dest, const void *source, size_t nelems, int PE_root, int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} \begin{Fsynopsis} INTEGER nelems, PE_root, PE_start, logPE_stride, PE_size INTEGER pSync(SHMEM_BCAST_SYNC_SIZE) -CALL SHMEM_BROADCAST4(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_BROADCAST8(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_BROADCAST32(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync) -CALL SHMEM_BROADCAST64(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync) +CALL @\FuncDecl{SHMEM\_BROADCAST4}@(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_BROADCAST8}@(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_BROADCAST32}@(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync) +CALL @\FuncDecl{SHMEM\_BROADCAST64}@(dest, source, nelems, PE_root, PE_start, logPE_stride, PE_size,pSync) \end{Fsynopsis} \begin{apiarguments} @@ -26,58 +26,65 @@ that is permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in \source. For \FUNC{shmem\_broadcast32} and \FUNC{shmem\_broadcast4}, this is the number of - 32-bit halfwords. nelems must be of type \VAR{size\_t} in \Clang. If you are + 32-bit halfwords. nelems must be of type \VAR{size\_t} in \Cstd. When using \Fortran, it must be a default integer value.} \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the \activeset, from which the data is copied. Must be greater than or equal to - 0 and less than \VAR{PE\_size}. \VAR{PE\_root} must be of type integer. If you - are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of - \ac{PE}s. \VAR{PE\_start} must be of type integer. If you are using \Fortran, + the active set, from which the data is copied. Must be greater than or equal to + 0 and less than \VAR{PE\_size}. \VAR{PE\_root} must be of type integer. When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of + \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} \apiargument{IN}{logPE\_stride}{ The log (base 2) of the stride between - consecutive \ac{PE} numbers in the \activeset. \VAR{log\_PE\_stride} must be of - type integer. If you are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_size}{ The number of \ac{PE}s in the \activeset. - \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must be a + consecutive \ac{PE} numbers in the active set. \VAR{log\_PE\_stride} must be of + type integer. When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_size}{ The number of \acp{PE} in the active set. + \VAR{PE\_size} must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{pSync}{ A symmetric work array. In \CorCpp, \VAR{pSync} must - be of type long and size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}. In \Fortran, - \VAR{pSync} must be of type integer and size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}. +\apiargument{IN}{pSync}{ + A symmetric work array of size \CONST{SHMEM\_BCAST\_SYNC\_SIZE}. + In \CorCpp, \VAR{pSync} must be an array of elements of type \CTYPE{long}. + In \Fortran, \VAR{pSync} must be an array of elements of default integer type. Every element of this array must be initialized with the value - \CONST{SHMEM\_SYNC\_VALUE} (in \CorCpp) or \CONST{SHMEM\_SYNC\_VALUE} (in - \Fortran) before any of the \ac{PE}s in the \activeset{} enter - \FUNC{shmem\_broadcast}.} + \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set + enters \FUNC{shmem\_broadcast}.} \end{apiarguments} \apidescription{ \openshmem broadcast routines are collective routines. They copy data object \source{} on the processor specified by \VAR{PE\_root} and store the values at - \dest{} on the other \ac{PE}s specified by the triplet \VAR{PE\_start}, + \dest{} on the other \acp{PE} specified by the triplet \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size}. The data is not copied to the \dest{} area on the root \ac{PE}. As with all \openshmem collective routines, each of these routines assumes that - only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the - \activeset{} calls an \openshmem collective routine, undefined behavior results. + only \acp{PE} in the active set call the routine. If a \ac{PE} not in the + active set calls an \openshmem collective routine, the behavior is undefined. The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, - and \VAR{PE\_size} must be equal on all \ac{PE}s in the \activeset. The same + and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. The same \dest{} and \source{} data objects and the same \VAR{pSync} work array must be - passed to all \ac{PE}s in the \activeset. + passed by all \acp{PE} in the active set. - Before any \ac{PE} calls a broadcast routine, you must ensure that the following - conditions exist (synchronization via a barrier or some other method is often - needed to ensure this): The \VAR{pSync} array on all \ac{PE}s in the - \activeset{} is not still in use from a prior call to a broadcast routine. The - \dest{} array on all \ac{PE}s in the \activeset{} is ready to accept the - broadcast data. + Before any \ac{PE} calls a broadcast routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{pSync} array on all \acp{PE} in the active set is + not still in use from a prior call to a broadcast routine. + \item The \dest{} array on all \acp{PE} in the active set is ready + to accept the broadcast data. + \end{itemize} + Otherwise, the behavior is undefined. Upon return from a broadcast routine, the following are true for the local - \ac{PE}: If the current \ac{PE} is not the root \ac{PE}, the \dest{} data object - is updated. The values in the \VAR{pSync} array are restored to the original - values. + \ac{PE}: + \begin{itemize} + \item If the current \ac{PE} is not the root \ac{PE}, + the \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \item The values in the \VAR{pSync} array are restored to the + original values. + \end{itemize} } \apidesctable{ @@ -86,10 +93,10 @@ }{Routine}{Data type of \VAR{dest} and \VAR{source}} \apitablerow{shmem\_broadcast8, shmem\_broadcast64}{Any noncharacter - type that has an element size of \CONST{64} bits. No \Fortran{} derived types or + type that has an element size of \CONST{64} bits. No \Fortran derived types or \CorCpp{} structures are allowed.} \apitablerow{shmem\_broadcast4, shmem\_broadcast32}{Any noncharacter - type that has an element size of \CONST{32} bits. No \Fortran{} + type that has an element size of \CONST{32} bits. No \Fortran derived types or \CorCpp{} structures are allowed.} \apireturnvalues{ @@ -101,16 +108,16 @@ Multiple calls to \openshmem routines that use the same \VAR{pSync} array do not require that \VAR{pSync} be reinitialized after the first call. - You must ensure the that the \VAR{pSync} array is not being updated by any - \ac{PE} in the \activeset{} while any of the \ac{PE}s participates in processing + The user must ensure that the \VAR{pSync} array is not being updated by any + \ac{PE} in the active set while any of the \acp{PE} participates in processing of an \openshmem broadcast routine. Be careful to avoid these situations: If the - \VAR{pSync} array is initialized at run time, some type of synchronization is - needed to ensure that all \ac{PE}s in the \activeset{} have initialized + \VAR{pSync} array is initialized at run time, before its first use, some type of synchronization is + needed to ensure that all \acp{PE} in the active set have initialized \VAR{pSync} before any of them enter an \openshmem routine called with the \VAR{pSync} synchronization array. A \VAR{pSync} array may be reused on a - subsequent \openshmem broadcast routine only if none of the \ac{PE}s in the - \activeset{} are still processing a prior \openshmem broadcast routine call that - used the same \VAR{pSync} array. In general, this can be ensured only by doing + subsequent \openshmem broadcast routine only if none of the \acp{PE} in the + active set are still processing a prior \openshmem broadcast routine call that + used the same \VAR{pSync} array. In general, this can be ensured only by doing some type of synchronization. } @@ -118,14 +125,14 @@ \apicexample {In the following examples, the call to \FUNC{shmem\_broadcast64} copies \source{} - on \ac{PE} 4 to \dest{} on \ac{PE}s 5, 6, and 7. + on \ac{PE} 4 to \dest{} on \acp{PE} 5, 6, and 7. \CorCpp{} example:} {./example_code/shmem_broadcast_example.c} {} \apifexample - {\Fortran{} example:} + {\Fortran example:} {./example_code/shmem_broadcast_example.f90} {} diff --git a/content/shmem_cache.tex b/content/shmem_cache.tex index 98c473620..8b8240319 100644 --- a/content/shmem_cache.tex +++ b/content/shmem_cache.tex @@ -4,27 +4,31 @@ \begin{apidefinition} +\begin{DeprecateBlock} \begin{Csynopsis} -void shmem_clear_cache_inv(void); -void shmem_set_cache_inv(void); -void shmem_clear_cache_line_inv(void *dest); -void shmem_set_cache_line_inv(void *dest); -void shmem_udcflush(void); -void shmem_udcflush_line(void *dest); +void @\FuncDecl{shmem\_clear\_cache\_inv}@(void); +void @\FuncDecl{shmem\_set\_cache\_inv}@(void); +void @\FuncDecl{shmem\_clear\_cache\_line\_inv}@(void *dest); +void @\FuncDecl{shmem\_set\_cache\_line\_inv}@(void *dest); +void @\FuncDecl{shmem\_udcflush}@(void); +void @\FuncDecl{shmem\_udcflush\_line}@(void *dest); \end{Csynopsis} +\end{DeprecateBlock} +% N.B., The DeprecateBlock environment is not necessary here +% because the entire Fortran API is deprecated. \begin{Fsynopsis} -CALL SHMEM_CLEAR_CACHE_INV -CALL SHMEM_SET_CACHE_INV -CALL SHMEM_SET_CACHE_LINE_INV(dest) -CALL SHMEM_UDCFLUSH -CALL SHMEM_UDCFLUSH_LINE(dest) +CALL @\FuncDecl{SHMEM\_CLEAR\_CACHE\_INV}@ +CALL @\FuncDecl{SHMEM\_SET\_CACHE\_INV}@ +CALL @\FuncDecl{SHMEM\_SET\_CACHE\_LINE\_INV}@(dest) +CALL @\FuncDecl{SHMEM\_UDCFLUSH}@ +CALL @\FuncDecl{SHMEM\_UDCFLUSH\_LINE}@(dest) \end{Fsynopsis} \begin{apiarguments} \apiargument{IN}{dest}{A data object that is local to the \ac{PE}. \VAR{dest} - can be of any noncharacter type. If you are using \Fortran, it can be of any + can be of any noncharacter type. When using \Fortran, it can be of any kind.} \end{apiarguments} diff --git a/content/shmem_calloc.tex b/content/shmem_calloc.tex new file mode 100644 index 000000000..9da3ca18f --- /dev/null +++ b/content/shmem_calloc.tex @@ -0,0 +1,46 @@ +\apisummary{ + Allocate a zeroed block of symmetric memory. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void *@\FuncDecl{shmem\_calloc}@(size_t count, size_t size); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{count}{The number of elements to allocate.} + \apiargument{IN}{size}{The size in bytes of each element to allocate.} +\end{apiarguments} + + +\apidescription{ + The \FUNC{shmem\_calloc} routine is a collective operation that allocates a + region of remotely-accessible + memory for an array of \VAR{count} objects of \VAR{size} bytes each and + returns a pointer to the lowest byte address of the allocated symmetric + memory. The space is initialized to all bits zero. + + If the allocation succeeds, the pointer returned shall be suitably + aligned so that it may be assigned to a pointer to any type of object. + If the allocation does not succeed, or either \VAR{count} or \VAR{size} is + \CONST{0}, the return value is a null pointer. + + The values for \VAR{count} and \VAR{size} shall each be equal across + all \acp{PE} calling \FUNC{shmem\_calloc}; otherwise, the behavior is + undefined. + + The \FUNC{shmem\_calloc} routine calls a procedure that is semantically + equivalent to \FUNC{shmem\_barrier\_all} on exit. +} + +\apireturnvalues{ + The \FUNC{shmem\_calloc} routine returns a pointer to the lowest byte + address of the allocated space; otherwise, it returns a null pointer. +} + +\apinotes{ + None. +} + +\end{apidefinition} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index b0b1e5d39..2ca75d491 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -1,91 +1,92 @@ \apisummary{ - Concatenates blocks of data from multiple \ac{PE}s to an array in every + Concatenates blocks of data from multiple \acp{PE} to an array in every \ac{PE}. } \begin{apidefinition} \begin{Csynopsis} -void shmem_collect32(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_collect64(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_fcollect32(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); -void shmem_fcollect64(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_collect32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_collect64}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_fcollect32}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); +void @\FuncDecl{shmem\_fcollect64}@(void *dest, const void *source, size_t nelems, int PE_start, int logPE_stride, int PE_size, long *pSync); \end{Csynopsis} \begin{Fsynopsis} INTEGER nelems INTEGER PE_start, logPE_stride, PE_size INTEGER pSync(SHMEM_COLLECT_SYNC_SIZE) -CALL SHMEM_COLLECT4(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_COLLECT8(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_COLLECT32(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_COLLECT64(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT4(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT8(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT32(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) -CALL SHMEM_FCOLLECT64(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_COLLECT4}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_COLLECT8}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_COLLECT32}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_COLLECT64}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_FCOLLECT4}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_FCOLLECT8}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_FCOLLECT32}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) +CALL @\FuncDecl{SHMEM\_FCOLLECT64}@(dest, source, nelems, PE_start, logPE_stride, PE_size, pSync) \end{Fsynopsis} \begin{apiarguments} \apiargument{OUT}{dest}{A symmetric array. The \dest{} argument must be large enough - to accept the concatenation of the \source{} arrays on all \ac{PE}s. The data + to accept the concatenation of the \source{} arrays on all participating \acp{PE}. The data types are as follows: For \FUNC{shmem\_collect8}, \FUNC{shmem\_collect64}, \FUNC{shmem\_fcollect8}, and \FUNC{shmem\_fcollect64}, any data type with an - element size of 64 bits. \Fortran{} derived types, \Fortran{} character type, + element size of 64 bits. \Fortran derived types, \Fortran character type, and \CorCpp{} structures are not permitted. For \FUNC{shmem\_collect4}, \FUNC{shmem\_collect32}, \FUNC{shmem\_fcollect4}, and \FUNC{shmem\_fcollect32}, - any data type with an element size of \CONST{32} bits. \Fortran{} derived - types, \Fortran{} character type, and \CorCpp{} structures are not permitted.} + any data type with an element size of \CONST{32} bits. \Fortran derived + types, \Fortran character type, and \CorCpp{} structures are not permitted.} \apiargument{IN}{source}{A symmetric data object that can be of any type permissible for the \dest{} argument.} \apiargument{IN}{nelems}{The number of elements in the \source{} array. \VAR{nelems} - must be of type \VAR{size\_t} for \Clang. If you are using \Fortran, it must be + must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of - \ac{PE}s. \VAR{PE\_start} must be of type integer. If you are using \Fortran, +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of + \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} \apiargument{IN}{logPE\_stride}{The log (base \CONST{2}) of the stride between - consecutive \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of - type integer. If you are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_size}{The number of \ac{PE}s in the \activeset. \VAR{PE\_size} - must be of type integer. If you are using \Fortran, it must be a default + consecutive \ac{PE} numbers in the active set. \VAR{logPE\_stride} must be of + type integer. When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set. \VAR{PE\_size} + must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be of - type long and size \CONST{SHMEM\_COLLECT\_SYNC\_SIZE}. In \Fortran, - \VAR{pSync} must be of type integer and size \CONST{SHMEM\_COLLECT\_SYNC\_SIZE}. - If you are using \Fortran, it must be a default integer value. Every element of - this array must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} in - \CorCpp{} or \CONST{SHMEM\_SYNC\_VALUE} in \Fortran{} before any of the \ac{PE}s - in the \activeset{} enter \FUNC{shmem\_collect} or \FUNC{shmem\_fcollect}.} +\apiargument{IN}{pSync}{ + A symmetric work array of size \CONST{SHMEM\_COLLECT\_SYNC\_SIZE}. + In \CorCpp, \VAR{pSync} must be an array of elements of type \CTYPE{long}. + In \Fortran, \VAR{pSync} must be an array of elements of default integer type. + Every element of this array must be initialized with the value + \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set + enter \FUNC{shmem\_collect} or \FUNC{shmem\_fcollect}.} \end{apiarguments} \apidescription{ - \OSH{} \FUNC{collect} and \FUNC{fcollect} routines concatenate \VAR{nelems} + \openshmem \FUNC{collect} and \FUNC{fcollect} routines concatenate \VAR{nelems} \CONST{64}-bit or \CONST{32}-bit data items from the \source{} array into the - \dest{} array, over the set of \ac{PE}s defined by \VAR{PE\_start}, + \dest{} array, over the set of \acp{PE} defined by \VAR{PE\_start}, \VAR{log2PE\_stride}, and \VAR{PE\_size}, in processor number order. The resultant \dest{} array contains the contribution from \ac{PE} \VAR{PE\_start} first, then the contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. The collected result is written to the \dest{} array for all - \ac{PE}s in the \activeset. + \acp{PE} in the active set. The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all - participating \ac{PE}s, while the \FUNC{collect} routines allow \VAR{nelems} to + participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to vary from \ac{PE} to \ac{PE}. As with all \openshmem collective routines, each of these routines assumes that - only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in the - \activeset{} and calls this collective routine, the behavior is undefined. + only \acp{PE} in the active set call the routine. If a \ac{PE} not in the + active set and calls this collective routine, the behavior is undefined. The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} - must be equal on all \ac{PE}s in the \activeset. The same \dest{} and \source{} - arrays and the same \VAR{pSync} work array must be passed to all \ac{PE}s in the - \activeset. + must be the same value on all \acp{PE} in the active set. The same \dest{} and \source{} + arrays and the same \VAR{pSync} work array must be passed by all \acp{PE} in the + active set. Upon return from a collective routine, the following are true for the local - \ac{PE}: The \dest{} array is updated. The values in the \VAR{pSync} array are + \ac{PE}: The \dest{} array is updated and the \source{} array may be safely reused. + The values in the \VAR{pSync} array are restored to the original values. } @@ -98,15 +99,15 @@ return, so a particular \VAR{pSync} buffer need only be initialized the first time it is used. - You must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} - in the \activeset{} while any of the \ac{PE}s participate in processing of an + The user must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} + in the active set while any of the \acp{PE} participate in processing of an \openshmem collective routine. Be careful to avoid these situations: If the \VAR{pSync} array is initialized at run time, some type of synchronization is - needed to ensure that all \ac{PE}s in the working set have initialized + needed to ensure that all \acp{PE} in the working set have initialized \VAR{pSync} before any of them enter an \openshmem routine called with the \VAR{pSync} synchronization array. A \VAR{pSync} array can be reused on a - subsequent \openshmem collective routine only if none of the \ac{PE}s in the - \activeset{} are still processing a prior \openshmem collective routine call + subsequent \openshmem collective routine only if none of the \acp{PE} in the + active set are still processing a prior \openshmem collective routine call that used the same \VAR{pSync} array. In general, this may be ensured only by doing some type of synchronization. @@ -118,12 +119,12 @@ \begin{apiexamples} \apicexample - {The following \FUNC{shmem\_collec}t example is for \CorCpp{} programs:} + {The following \FUNC{shmem\_collect} example is for \CorCpp{} programs:} {./example_code/shmem_collect_example.c} {} \apifexample - {The following \FUNC{SHMEM\_COLLECT} example is for \Fortran{} programs:} + {The following \FUNC{SHMEM\_COLLECT} example is for \Fortran programs:} {./example_code/shmem_collect_example.f90} {} diff --git a/content/shmem_ctx_create.tex b/content/shmem_ctx_create.tex new file mode 100644 index 000000000..7211b4547 --- /dev/null +++ b/content/shmem_ctx_create.tex @@ -0,0 +1,66 @@ +\apisummary{ + Create a communication context. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmem\_ctx\_create}@(long options, shmem_ctx_t *ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{options}{The set of options requested for the given context. + Multiple options may be requested by combining them with a bitwise + OR operation; otherwise, \CONST{0} can be given if no options are + requested.} + \apiargument{OUT}{ctx}{A handle to the newly created context.} +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_ctx\_create} routine creates a new communication context + and returns its handle through the \VAR{ctx} argument. If the context was + created successfully, a value of zero is returned; otherwise, a nonzero + value is returned. An unsuccessful context + creation call is not treated as an error and the \openshmem library remains + in a correct state. The creation call can be reattempted with different + options or after additional resources become available. + + By default, contexts are {\em shareable} and, when it is allowed by the + threading model provided by the \openshmem library, they can be used concurrently by + multiple threads within the PE where they were created. + % + The following options can be supplied during context creation to restrict + this usage model and enable performance optimizations. When using a given + context, the application must comply with the requirements of all options + set on that context; otherwise, the behavior is undefined. + No options are enabled on the default context. + + \apitablerow{\LibConstRef{SHMEM\_CTX\_SERIALIZED}}{ + The given context is shareable; however, it will not be used by multiple threads + concurrently. When the \CONST{SHMEM\_CTX\_SERIALIZED} option is + set, the user must ensure that operations involving the given + context are serialized by the application.} + + \apitablerow{\LibConstRef{SHMEM\_CTX\_PRIVATE}}{ + The given context will be used only by the thread that created it.} + + \apitablerow{\LibConstRef{SHMEM\_CTX\_NOSTORE}}{ + Quiet and fence operations performed on the given context are not + required to enforce completion and ordering of memory store + operations. + When ordering of store operations is needed, the application must + perform a synchronization operation on a context without the + \CONST{SHMEM\_CTX\_NOSTORE} option enabled.} + +} + +\apireturnvalues{ + Zero on success and nonzero otherwise. +} + +\apinotes{ + None. +} + +\end{apidefinition} + diff --git a/content/shmem_ctx_destroy.tex b/content/shmem_ctx_destroy.tex new file mode 100644 index 000000000..287c5bcdd --- /dev/null +++ b/content/shmem_ctx_destroy.tex @@ -0,0 +1,64 @@ +\apisummary{ + Destroy a communication context. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_ctx\_destroy}@(shmem_ctx_t ctx); +\end{Csynopsis} + +\begin{apiarguments} + \apiargument{IN}{ctx}{Handle to the context that will be destroyed.} +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_ctx\_destroy} destroys a context that was created by a call to + \FUNC{shmem\_ctx\_create}. It is the user's responsibility to ensure that + the context is not used after it has been destroyed, for example when the + destroyed context is used by multiple threads. This function + performs an implicit quiet operation on the given context before it is freed. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + It is invalid to pass \CONST{SHMEM\_CTX\_DEFAULT} to this routine. + + Destroying a context makes it impossible for the user to complete + communication operations that are pending on that context. This includes + nonblocking communication operations, whose local buffers are only returned + to the user after the operations have been completed. An implicit quiet is + performed when freeing a context to avoid this ambiguity. + + A context with the \CONST{SHMEM\_CTX\_PRIVATE} option enabled must be + destroyed by the thread that created it. +} + +\begin{apiexamples} + + \apicexample + {The following example demonstrates the use of contexts in a multithreaded + \Cstd[11] program that uses OpenMP for threading. This example shows the + shared counter load balancing method and illustrates the use of contexts + for thread isolation.} + {./example_code/shmem_ctx.c} + {} + + \apicexample + {The following example demonstrates the use of contexts in a + single-threaded \Cstd[11] program that performs a summation reduction where + the data contained in the \VAR{in\_buf} arrays on all \acp{PE} is reduced into + the \VAR{out\_buf} arrays on all \acp{PE}. The buffers are divided into + segments and processing of the segments is pipelined. Contexts are used + to overlap an all-to-all exchange of data for segment \VAR{p} with the + local reduction of segment \VAR{p-1}.} + {./example_code/shmem_ctx_pipelined_reduce.c} + {} + +\end{apiexamples} + +\end{apidefinition} + diff --git a/content/shmem_fence.tex b/content/shmem_fence.tex index 967b730af..f86b83c3a 100644 --- a/content/shmem_fence.tex +++ b/content/shmem_fence.tex @@ -1,30 +1,34 @@ \apisummary{ - Assures ordering of delivery of \PUT{}, \acp{AMO}, and memory store routines + Assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data objects. } \begin{apidefinition} \begin{Csynopsis} -void shmem_fence(void); +void @\FuncDecl{shmem\_fence}@(void); +void @\FuncDecl{shmem\_ctx\_fence}@(shmem_ctx_t ctx); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_FENCE +CALL @\FuncDecl{SHMEM\_FENCE}@ \end{Fsynopsis} \begin{apiarguments} -\apiargument{None.}{}{} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \end{apiarguments} \apidescription{ - This routine assures ordering of delivery of \PUT{}, \acp{AMO}, and memory store - routines to symmetric data objects. All \PUT{}, \acp{AMO}, and memory store - routines to symmetric data objects issued to a particular remote \ac{PE} prior + This routine assures ordering of delivery of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} + routines to symmetric data objects. All \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} + routines to symmetric data objects issued to a particular remote \ac{PE} + on the given context prior to the call to \FUNC{shmem\_fence} are guaranteed to be delivered before any - subsequent \PUT{}, \acp{AMO}, and memory store routines to symmetric data + subsequent \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data objects to the same \ac{PE}. \FUNC{shmem\_fence} guarantees order of delivery, - not completion. + not completion. It does not guarantee order of delivery of nonblocking \GET{} routines. } \apireturnvalues{ @@ -32,22 +36,37 @@ } \apinotes{ - \FUNC{shmem\_fence} only provides per-\ac{PE} ordering guarantees and does - not guarantee completion of delivery. There is a subtle difference between + \FUNC{shmem\_fence} only provides per-\ac{PE} ordering guarantees and does not + guarantee completion of delivery. + \FUNC{shmem\_fence} also does not have an effect on the ordering between memory + accesses issued by the target PE. \FUNC{shmem\_wait\_until}, \FUNC{shmem\_test}, + \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines can be called by the target PE to guarantee + ordering of its memory accesses. + There is a subtle difference between \FUNC{shmem\_fence} and \FUNC{shmem\_quiet}, in that, \FUNC{shmem\_quiet} - guarantees completion of \PUT{}, \acp{AMO}, and memory store routines to + guarantees completion of \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data objects which makes the updates visible to all other \acp{PE}. - The \FUNC{shmem\_quiet} routine should be called if completion of PUT{}, - \acp{AMO}, and memory store routines to symmetric data objects is desired - when multiple remote \ac{PE}s are involved. + The \FUNC{shmem\_quiet} routine should be called if completion of \PUT{}, + \ac{AMO}, memory store, and nonblocking \PUT{} routines to symmetric data objects is desired + when multiple remote \acp{PE} are involved. + + In an \openshmem program with multithreaded \acp{PE}, it is the + user's responsibility to ensure ordering between operations issued by the threads + in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, memory stores, + and nonblocking routines) and calls by threads in that \ac{PE} to + \FUNC{shmem\_fence}. The \FUNC{shmem\_fence} routine can enforce memory store ordering only for the + calling thread. Thus, to ensure ordering for memory stores performed by a thread that is + not the thread calling \FUNC{shmem\_fence}, the update must be made visible to the + calling thread according to the rules of the memory model associated with + the threading environment. } \begin{apiexamples} \apicexample - {The following \FUNC{shmem\_fence} example is for \CorCpp{} programs: } + {The following example uses \FUNC{shmem\_fence} in a \Cstd[11] program: } {./example_code/shmem_fence_example.c} {\VAR{Put1} will be ordered to be delivered before \VAR{put3} and \VAR{put2} will be ordered to be delivered before \VAR{put4}.} diff --git a/content/shmem_fetch.tex b/content/shmem_fetch.tex deleted file mode 100644 index 600232e05..000000000 --- a/content/shmem_fetch.tex +++ /dev/null @@ -1,53 +0,0 @@ -\apisummary{ - Atomically fetches the value of a remote data object. -} - -\begin{apidefinition} - -\begin{C11synopsis} -TYPE shmem_fetch(const TYPE *dest, int pe); -\end{C11synopsis} -where \TYPE{} is one of the extended \ac{AMO} types specified by Table \ref{extamotypes}. - -\begin{Csynopsis} -TYPE shmem__fetch(const TYPE *dest, int pe); -\end{Csynopsis} -where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. - -\begin{Fsynopsis} -INTEGER pe -INTEGER*4 SHMEM_INT4_FETCH, ires_i4 -ires_i4 = SHMEM_INT4_FETCH(dest, pe) -INTEGER*8 SHMEM_INT8_FETCH, ires_i8 -ires_i8 = SHMEM_INT8_FETCH(dest, pe) -REAL*4 SHMEM_REAL4_FETCH, res_r4 -res_r4 = SHMEM_REAL4_FETCH(dest, pe) -REAL*8 SHMEM_REAL8_FETCH, res_r8 -res_r8 = SHMEM_REAL8_FETCH(dest, pe) -\end{Fsynopsis} - -\begin{apiarguments} - -\apiargument{IN}{dest}{The remotely accessible data object to be fetched from - the remote \ac{PE}.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number from which - \VAR{dest} is to be fetched.} - -\end{apiarguments} - -\apidescription{ - \FUNC{shmem\_fetch} performs an atomic fetch operation. It returns the - contents of the \VAR{dest} as an atomic operation. -} - -\apireturnvalues{ - The contents at the \VAR{dest} address on the remote \ac{PE}. - The data type of the return value is the same as the the type of - the remote data object. -} - -\apinotes{ - None. -} - -\end{apidefinition} diff --git a/content/shmem_finalize.tex b/content/shmem_finalize.tex index cf0ab98b1..a65348fa2 100644 --- a/content/shmem_finalize.tex +++ b/content/shmem_finalize.tex @@ -1,5 +1,5 @@ \apisummary{ - A collective operation that releases resources used by the \openshmem + A collective operation that releases all resources used by the \openshmem library. This only terminates the \openshmem portion of a program, not the entire program. } @@ -7,11 +7,11 @@ \begin{apidefinition} \begin{Csynopsis} -void shmem_finalize(void); +void @\FuncDecl{shmem\_finalize}@(void); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_FINALIZE() +CALL @\FuncDecl{SHMEM\_FINALIZE}@() \end{Fsynopsis} \begin{apiarguments} @@ -20,18 +20,23 @@ \apidescription{ \FUNC{shmem\_finalize} is a collective operation that ends the \openshmem - portion of a program previously initialized by \FUNC{shmem\_init} and - releases resources used by the \openshmem library. This collective + portion of a program previously initialized by \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} and + releases all resources used by the \openshmem library. This collective operation requires all \acp{PE} to participate in the call. There is an - implicit global barrier in \FUNC{shmem\_finalize} so that pending - communications are completed, and no resources can be released until all - \acp{PE} have entered \FUNC{shmem\_finalize}. \FUNC{shmem\_finalize} must be + implicit global barrier in \FUNC{shmem\_finalize} to ensure that pending + communications are completed and that no resources are released until all + \acp{PE} have entered \FUNC{shmem\_finalize}. + This routine destroys all shareable contexts. The user is + responsible for destroying all contexts with the + \CONST{SHMEM\_CTX\_PRIVATE} option enabled prior to calling this routine; + otherwise, the behavior is undefined. + \FUNC{shmem\_finalize} must be the last \openshmem library call encountered in the \openshmem portion of a - program. A call to \FUNC{shmem\_finalize} will release any resources - initialized by a corresponding call to \FUNC{shmem\_init}. All processes + program. A call to \FUNC{shmem\_finalize} will release all resources + initialized by a corresponding call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread}. All processes that represent the \acp{PE} will still exist after the call to \FUNC{shmem\_finalize} returns, but they will no longer have access - to any resources that have been released. + to resources that have been released. } \apireturnvalues{ @@ -44,17 +49,13 @@ \FUNC{shmem\_ptr}. This collective operation requires all \acp{PE} to participate in the call, not just a subset of the \acp{PE}. The non-\openshmem portion of a program may continue after a call to - \FUNC{shmem\_finalize} by all \acp{PE}. There is an implicit - \FUNC{shmem\_finalize} at the end of main, so that having an explicit call - to \FUNC{shmem\_finalize} is optional. However, an explicit - \FUNC{shmem\_finalize} may be required as an entry point for wrappers used - by profiling or other tools that need to perform their own finalization. + \FUNC{shmem\_finalize} by all \acp{PE}. } \begin{apiexamples} \apicexample - {The following finalize example is for \CorCpp{} programs:} + {The following finalize example is for \Cstd[11] programs:} {./example_code/shmem_finalize_example.c} {} diff --git a/content/shmem_finc.tex b/content/shmem_finc.tex deleted file mode 100644 index afaf3b180..000000000 --- a/content/shmem_finc.tex +++ /dev/null @@ -1,69 +0,0 @@ -\apisummary{ - Performs an atomic fetch-and-increment operation on a remote data object. -} - -\begin{apidefinition} - -\begin{C11synopsis} -TYPE shmem_finc(TYPE *dest, int pe); -\end{C11synopsis} -where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. - -\begin{Csynopsis} -TYPE shmem__finc(TYPE *dest, int pe); -\end{Csynopsis} -where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. - -\begin{Fsynopsis} -INTEGER pe -INTEGER*4 SHMEM_INT4_FINC, ires_i4 -ires_i4 = SHMEM_INT4_FINC(dest, pe) -INTEGER*8 SHMEM_INT8_FINC, ires_i8 -ires_i8 = SHMEM_INT8_FINC(dest, pe) -\end{Fsynopsis} - - -\begin{apiarguments} - -\apiargument{IN}{dest}{The remotely accessible integer data object to be updated - on the remote \ac{PE}. The type of \dest{} should match that implied in the - SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated. If you are using \Fortran, it must be a default - integer value.} - -\end{apiarguments} - - -\apidescription{ - These routines perform a fetch-and-increment operation. The \dest{} on - \ac{PE} \VAR{pe} is increased by one and the routine returns the previous - contents of \dest{} as an atomic operation. -} - -\apidesctable{ - If you are using \Fortran, \VAR{dest} must be of the following type: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} - -\apitablerow{SHMEM\_INT4\_FINC}{\CONST{4}-byte integer} -\apitablerow{SHMEM\_INT8\_FINC}{\CONST{8}-byte integer} - -\apireturnvalues{ - The contents that had been at the \dest{} address on the remote \ac{PE} prior to - the increment. The data type of the return value is the same as the \dest. -} - -\apinotes{ - None. -} - -\begin{apiexamples} - -\apicexample - {The following \FUNC{shmem\_finc} example is for \CorCpp{} programs:} - {./example_code/shmem_finc_example.c} - {} - -\end{apiexamples} - -\end{apidefinition} diff --git a/content/shmem_g.tex b/content/shmem_g.tex index 40b942d3b..89b3f84b9 100644 --- a/content/shmem_g.tex +++ b/content/shmem_g.tex @@ -5,18 +5,23 @@ \begin{apidefinition} \begin{C11synopsis} -TYPE shmem_g(const TYPE *addr, int pe); +TYPE @\FuncDecl{shmem\_g}@(const TYPE *source, int pe); +TYPE @\FuncDecl{shmem\_g}@(shmem_ctx_t ctx, const TYPE *source, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -TYPE shmem__g(const TYPE *addr, int pe); +TYPE @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_g}@(const TYPE *source, int pe); +TYPE @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_g}@(shmem_ctx_t ctx, const TYPE *source, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{apiarguments} - \apiargument{IN}{addr}{The remotely accessible array element or scalar data object.} - \apiargument{IN}{pe}{The number of the remote \ac{PE} on which \VAR{addr} resides.} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{IN}{source}{The remotely accessible array element or scalar data object.} + \apiargument{IN}{pe}{The number of the remote \ac{PE} on which \VAR{source} resides.} \end{apiarguments} \apidescription{ @@ -35,7 +40,7 @@ \begin{apiexamples} \apicexample - {The following \FUNC{shmem\_g} example is for \CorCpp{} programs:} + {The following \FUNC{shmem\_g} example is for \Cstd[11] programs:} {./example_code/shmem_g_example.c} {} \end{apiexamples} diff --git a/content/shmem_get.tex b/content/shmem_get.tex index deab3b5bb..2f567969b 100644 --- a/content/shmem_get.tex +++ b/content/shmem_get.tex @@ -5,50 +5,58 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_get(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_get}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_get}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__get(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_get}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_get}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void shmem_get(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_get\FuncParam{SIZE}}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_get\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void shmem_getmem(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_getmem}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_getmem}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} \begin{Fsynopsis} INTEGER nelems, pe -CALL SHMEM_CHARACTER_GET(dest, source, nelems, pe) -CALL SHMEM_COMPLEX_GET(dest, source, nelems, pe) -CALL SHMEM_DOUBLE_GET(dest, source, nelems, pe) -CALL SHMEM_GET4(dest, source, nelems, pe) -CALL SHMEM_GET8(dest, source, nelems, pe) -CALL SHMEM_GET32(dest, source, nelems, pe) -CALL SHMEM_GET128(dest, source, nelems, pe) -CALL SHMEM_GETMEM(dest, source, nelems, pe) -CALL SHMEM_INTEGER_GET(dest, source, nelems, pe) -CALL SHMEM_LOGICAL_GET(dest, source, nelems, pe) -CALL SHMEM_REAL_GET(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_CHARACTER\_GET}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_COMPLEX\_GET}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_DOUBLE\_GET}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET4}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET8}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET32}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET64}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET128}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GETMEM}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_INTEGER\_GET}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_LOGICAL\_GET}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_REAL\_GET}@(dest, source, nelems, pe) \end{Fsynopsis} \begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \apiargument{OUT}{dest}{Local data object to be updated.} \apiargument{IN}{source}{Data object on the \ac{PE} identified by \VAR{pe} that contains the data to be copied. This data object must be remotely accessible.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must - be of type integer. If you are using \Fortran, it must be a constant, + be of type integer. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \end{apiarguments} @@ -64,11 +72,11 @@ which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} - \apitablerow{shmem\_getmem}{\Fortran: Any noncharacter type. \Clang: Any + \apitablerow{shmem\_getmem}{\Fortran: Any noncharacter type. \Cstd: Any data type. nelems is scaled in bytes.} \apitablerow{shmem\_get4, shmem\_get32}{Any noncharacter type that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_get8}{\Clang: Any noncharacter type that + \apitablerow{shmem\_get8}{\Cstd: Any noncharacter type that has a storage size equal to \CONST{8} bits.} \apitablerow{}{\Fortran: Any noncharacter type that has a storage size equal to \CONST{64} bits.} @@ -93,7 +101,7 @@ \apinotes{ See Section \ref{subsec:memory_model} for a definition of the term remotely accessible. - If you are using \Fortran, data types must be of default size. For example, a real + When using \Fortran, data types must be of default size. For example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or \CONST{REAL(KIND=KIND(1.0))}. } diff --git a/content/shmem_get_nbi.tex b/content/shmem_get_nbi.tex index 835eab557..0f7f1f24f 100644 --- a/content/shmem_get_nbi.tex +++ b/content/shmem_get_nbi.tex @@ -6,50 +6,58 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_get_nbi(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_get\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_get\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__get_nbi(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_get\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_get\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void shmem_get_nbi(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_get\FuncParam{SIZE}\_nbi}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_get\FuncParam{SIZE}\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void shmem_getmem_nbi(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_getmem\_nbi}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_getmem\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} \begin{Fsynopsis} INTEGER nelems, pe -CALL SHMEM_CHARACTER_GET_NBI(dest, source, nelems, pe) -CALL SHMEM_COMPLEX_GET_NBI(dest, source, nelems, pe) -CALL SHMEM_DOUBLE_GET_NBI(dest, source, nelems, pe) -CALL SHMEM_GET4_NBI(dest, source, nelems, pe) -CALL SHMEM_GET8_NBI(dest, source, nelems, pe) -CALL SHMEM_GET32_NBI(dest, source, nelems, pe) -CALL SHMEM_GET128_NBI(dest, source, nelems, pe) -CALL SHMEM_GETMEM_NBI(dest, source, nelems, pe) -CALL SHMEM_INTEGER_GET_NBI(dest, source, nelems, pe) -CALL SHMEM_LOGICAL_GET_NBI(dest, source, nelems, pe) -CALL SHMEM_REAL_GET_NBI(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_CHARACTER\_GET\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_COMPLEX\_GET\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_DOUBLE\_GET\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET4\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET8\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET32\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET64\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GET128\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_GETMEM\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_INTEGER\_GET\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_LOGICAL\_GET\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_REAL\_GET\_NBI}@(dest, source, nelems, pe) \end{Fsynopsis} \begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \apiargument{OUT}{dest}{Local data object to be updated.} \apiargument{IN}{source}{Data object on the \ac{PE} identified by \VAR{pe} that contains the data to be copied. This data object must be remotely accessible.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must - be of type integer. If you are using \Fortran, it must be a constant, + be of type integer. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \end{apiarguments} @@ -67,11 +75,11 @@ which are as follows: }{Routine}{Data type of \VAR{dest} and \VAR{source}} - \apitablerow{shmem\_getmem\_nbi}{\Fortran: Any noncharacter type. \Clang: + \apitablerow{shmem\_getmem\_nbi}{\Fortran: Any noncharacter type. \Cstd: Any data type. nelems is scaled in bytes.} \apitablerow{shmem\_get4\_nbi, shmem\_get32\_nbi}{Any noncharacter type that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_get8\_nbi}{\Clang: Any noncharacter type that + \apitablerow{shmem\_get8\_nbi}{\Cstd: Any noncharacter type that has a storage size equal to \CONST{8} bits.} \apitablerow{}{\Fortran: Any noncharacter type that has a storage size equal to \CONST{64} bits.} @@ -96,7 +104,7 @@ \apinotes{ See Section \ref{subsec:memory_model} for a definition of the term remotely accessible. - If you are using \Fortran, data types must be of default size. For example, a real + When using \Fortran, data types must be of default size. For example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or \CONST{REAL(KIND=KIND(1.0))}. } diff --git a/content/shmem_global_exit.tex b/content/shmem_global_exit.tex index 7ed12f9cc..577c952c8 100644 --- a/content/shmem_global_exit.tex +++ b/content/shmem_global_exit.tex @@ -4,13 +4,17 @@ \begin{apidefinition} +\begin{C11synopsis} +_Noreturn void @\FuncDecl{shmem\_global\_exit}@(int status); +\end{C11synopsis} + \begin{Csynopsis} -void shmem_global_exit(int status); +void @\FuncDecl{shmem\_global\_exit}@(int status); \end{Csynopsis} \begin{Fsynopsis} INTEGER STATUS -CALL SHMEM_GLOBAL_EXIT(status) +CALL @\FuncDecl{SHMEM\_GLOBAL\_EXIT}@(status) \end{Fsynopsis} \begin{apiarguments} @@ -24,7 +28,7 @@ the entire program, not just the \openshmem portion. When any \ac{PE} calls \FUNC{shmem\_global\_exit}, it results in the immediate notification to all \acp{PE} to terminate. \FUNC{shmem\_global\_exit} flushes I/O and releases - resources in accordance with C/C++/Fortran language requirements for normal + resources in accordance with \CorCppFor language requirements for normal program termination. If more than one \ac{PE} calls \FUNC{shmem\_global\_exit}, then the exit status returned to the environment shall be one of the values passed to \FUNC{shmem\_global\_exit} as the @@ -42,14 +46,15 @@ \FUNC{shmem\_global\_exit} may be used in situations where one or more \acp{PE} have determined that the program has completed and/or should terminate early. Accordingly, the integer status argument can be used to - pass any information about the nature of the exit, e.g an encountered error - or a found solution. Since \FUNC{shmem\_global\_exit} is a non-collective + pass any information about the nature of the exit; e.g., that the program + encountered an error or found a solution. + Since \FUNC{shmem\_global\_exit} is a non-collective routine, there is no implied synchronization, and all \acp{PE} must terminate regardless of their current execution state. While I/O must be - flushed for standard language I/O calls from C/C++/Fortran, it is - implementation dependent as to how I/O done by other means (e.g. third + flushed for standard language I/O calls from \CorCppFor, it is + implementation dependent as to how I/O done by other means (e.g., third party I/O libraries) is handled. Similarly, resources are released - according to C/C++/Fortran standard language requirements, but this may not + according to \CorCppFor standard language requirements, but this may not include all resources allocated for the \openshmem program. However, a quality implementation will make a best effort to flush all I/O and clean up all resources. diff --git a/content/shmem_iget.tex b/content/shmem_iget.tex index e36c93ef5..c4ce98e90 100644 --- a/content/shmem_iget.tex +++ b/content/shmem_iget.tex @@ -5,53 +5,59 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_iget(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_iget}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_iget}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__iget(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_iget}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_iget}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void shmem_iget(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_iget\FuncParam{SIZE}}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_iget\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{Fsynopsis} INTEGER dst, sst, nelems, pe -CALL SHMEM_COMPLEX_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_DOUBLE_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET4(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET8(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET32(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET64(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IGET128(dest, source, dst, sst, nelems, pe) -CALL SHMEM_INTEGER_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_LOGICAL_IGET(dest, source, dst, sst, nelems, pe) -CALL SHMEM_REAL_IGET(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_COMPLEX\_IGET}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_DOUBLE\_IGET}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IGET4}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IGET8}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IGET32}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IGET64}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IGET128}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_INTEGER\_IGET}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_LOGICAL\_IGET}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_REAL\_IGET}@(dest, source, dst, sst, nelems, pe) \end{Fsynopsis} \begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \apiargument{OUT}{dest}{Array to be updated on the local \ac{PE}. } \apiargument{IN}{source}{Array containing the data to be copied on the remote \ac{PE}.} \apiargument{IN}{dst}{The stride between consecutive elements of the \dest{} array. The stride is scaled by the element size of the \dest{} array. A value of \CONST{1} indicates contiguous data. \VAR{dst} must be of - type \textit{ptrdiff\_t}. If you are calling from \Fortran, it must + type \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} \apiargument{IN}{sst}{The stride between consecutive elements of the \source{} array. The stride is scaled by the element size of the \source{} array. A value of \CONST{1} indicates contiguous data. \VAR{sst} must be - of type \textit{ptrdiff\_t}. If you are calling from \Fortran, it must + of type \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be - of type integer. If you are using \Fortran, it must be a constant, + of type integer. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \end{apiarguments} @@ -68,7 +74,7 @@ {Routine}{Data type of \VAR{dest} and \VAR{source}} \apitablerow{shmem\_iget4, shmem\_iget32}{Any noncharacter type that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_iget8}{\Clang: Any noncharacter type that + \apitablerow{shmem\_iget8}{\Cstd: Any noncharacter type that has a storage size equal to \CONST{8} bits.} \apitablerow{}{\Fortran: Any noncharacter type that has a storage size equal to \CONST{64} bits.} @@ -87,7 +93,7 @@ } \apinotes{ - If you are using \Fortran, data types must be of default size. For example, a + When using \Fortran, data types must be of default size. For example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or \CONST{REAL(KIND=KIND(1.0))}. } @@ -95,7 +101,7 @@ \begin{apiexamples} \apifexample - {The following example uses \FUNC{shmem\_logical\_iget} in a \Fortran{} + {The following example uses \FUNC{shmem\_logical\_iget} in a \Fortran program.} {./example_code/shmem_iget_example.f90} {} diff --git a/content/shmem_inc.tex b/content/shmem_inc.tex deleted file mode 100644 index 239247e1d..000000000 --- a/content/shmem_inc.tex +++ /dev/null @@ -1,64 +0,0 @@ -\apisummary{ - Performs an atomic increment operation on a remote data object. -} - -\begin{apidefinition} - -\begin{C11synopsis} -void shmem_inc(TYPE *dest, int pe); -\end{C11synopsis} -where \TYPE{} is one of the standard \ac{AMO} types specified by Table \ref{stdamotypes}. - -\begin{Csynopsis} -void shmem__inc(TYPE *dest, int pe); -\end{Csynopsis} -where \TYPE{} is one of the standard \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{stdamotypes}. - -\begin{Fsynopsis} -INTEGER pe -CALL SHMEM_INT4_INC(dest, pe) -CALL SHMEM_INT8_INC(dest, pe) -\end{Fsynopsis} - -\begin{apiarguments} - -\apiargument{IN}{dest}{The remotely accessible integer data object to be updated - on the remote \ac{PE}. The type of \dest{} should match that implied in the - SYNOPSIS section.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \dest{} is to be updated. If you are using \Fortran{}, it must be a default - integer value.} - -\end{apiarguments} - -\apidescription{ - These routines perform an atomic increment operation on the \VAR{dest} data - object on \ac{PE}. -} - - -\apidesctable{ - If you are using \Fortran, \VAR{dest} must be of the following type: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} - -\apitablerow{SHMEM\_INT4\_INC}{\CONST{4}-byte integer} -\apitablerow{SHMEM\_INT8\_INC}{\CONST{8}-byte integer} - -\apireturnvalues{ - None. -} - -\apinotes{ - The term remotely accessible is defined in Section \ref{subsec:memory_model}. -} - -\begin{apiexamples} - -\apicexample - { The following \FUNC{shmem\_inc} example is for \CorCpp{} programs: } - {./example_code/shmem_inc_example.c} - {} - -\end{apiexamples} - -\end{apidefinition} diff --git a/content/shmem_info_get_name.tex b/content/shmem_info_get_name.tex index c608ae046..dd1d5ab8b 100644 --- a/content/shmem_info_get_name.tex +++ b/content/shmem_info_get_name.tex @@ -1,16 +1,17 @@ \apisummary{ - This routine returns the vendor defined character string. + This routine returns the vendor defined name string that is consistent + with the library constant \CONST{SHMEM\_VENDOR\_STRING}. } \begin{apidefinition} \begin{Csynopsis} -void shmem_info_get_name(char *name); +void @\FuncDecl{shmem\_info\_get\_name}@(char *name); \end{Csynopsis} \begin{Fsynopsis} CHARACTER *(*)NAME -SHMEM_INFO_GET_NAME(NAME) +CALL @\FuncDecl{SHMEM\_INFO\_GET\_NAME}@(NAME) \end{Fsynopsis} \begin{apiarguments} @@ -18,18 +19,18 @@ \end{apiarguments} \apidescription{ - This routine returns the vendor defined character string of size defined by - the constant SHMEM\_MAX\_NAME\_LEN. The program calling this function - prepares the memory of size SHMEM\_MAX\_NAME\_LEN, and the implementation - copies the string of size at most SHMEM\_MAX\_NAME\_LEN. In C, the string is - terminated by a null character. In Fortran, the string of size less than - SHMEM\_MAX\_NAME\_LEN is padded with blank characters up to size - SHMEM\_MAX\_NAME\_LEN. The implementation copying a string of size greater - than SHMEM\_MAX\_NAME\_LEN results in an undefined behavior. Multiple - invocations of the routine in an \openshmem{} program always return the - same string. For a given library implementation, the major and minor - version returned by these calls is consistent with the compile-time - constants defined in its shmem.h. + This routine returns the vendor defined name string of size defined by + the library constant \CONST{SHMEM\_MAX\_NAME\_LEN}. The program calling + this function provides the \VAR{name} memory buffer of at least size + \CONST{SHMEM\_MAX\_NAME\_LEN}. The implementation copies the vendor defined + string of size at most \CONST{SHMEM\_MAX\_NAME\_LEN} to \VAR{name}. In + \CorCpp, the string is terminated by a null character. In \Fortran, + the string of size less than \CONST{SHMEM\_MAX\_NAME\_LEN} is padded with + blank characters up to size \CONST{SHMEM\_MAX\_NAME\_LEN}. If the + \VAR{name} memory buffer is provided with size less than + \CONST{SHMEM\_MAX\_NAME\_LEN}, behavior is undefined. For a given library + implementation, the vendor string returned is consistent with the library + constant \CONST{SHMEM\_VENDOR\_STRING}. } \apireturnvalues{ diff --git a/content/shmem_info_get_version.tex b/content/shmem_info_get_version.tex index b4a68e060..3ef298963 100644 --- a/content/shmem_info_get_version.tex +++ b/content/shmem_info_get_version.tex @@ -5,24 +5,24 @@ \begin{apidefinition} \begin{Csynopsis} -void shmem_info_get_version(int *major, int *minor); +void @\FuncDecl{shmem\_info\_get\_version}@(int *major, int *minor); \end{Csynopsis} \begin{Fsynopsis} INTEGER MAJOR, MINOR -SHMEM_INFO_GET_VERSION(MAJOR, MINOR) +CALL @\FuncDecl{SHMEM\_INFO\_GET\_VERSION}@(MAJOR, MINOR) \end{Fsynopsis} \begin{apiarguments} - \apiargument{OUT}{major}{The major version of the \openshmem{} standard in use.} - \apiargument{OUT}{minor}{The minor version of the \openshmem{} standard in use.} + \apiargument{OUT}{major}{The major version of the \openshmem Specification in use.} + \apiargument{OUT}{minor}{The minor version of the \openshmem Specification in use.} \end{apiarguments} \apidescription{ - This routine returns the major and minor version of the \openshmem{} standard + This routine returns the major and minor version of the \openshmem Specification in use. For a given library implementation, the major and minor version - returned by these calls is consistent with the compile-time constants, - SHMEM\_MAJOR\_VERSION and SHMEM\_MINOR\_VERSION, defined in its shmem.h. + returned by these calls are consistent with the library constants + \CONST{SHMEM\_MAJOR\_VERSION} and \CONST{SHMEM\_MINOR\_VERSION}. } \apireturnvalues{ diff --git a/content/shmem_init.tex b/content/shmem_init.tex index 065493f90..2bf3b9c70 100644 --- a/content/shmem_init.tex +++ b/content/shmem_init.tex @@ -6,11 +6,11 @@ \begin{apidefinition} \begin{Csynopsis} -void shmem_init(void); +void @\FuncDecl{shmem\_init}@(void); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_INIT() +CALL @\FuncDecl{SHMEM\_INIT}@() \end{Fsynopsis} @@ -24,8 +24,8 @@ other \openshmem routine may be called. At the end of the \openshmem program which it initialized, the call to \FUNC{shmem\_init} must be matched with a call to \FUNC{shmem\_finalize}. After the first call to \FUNC{shmem\_init}, a - subsequent call to \FUNC{shmem\_init} in the same program results in undefined - behavior. + subsequent call to \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread} in the + same program results in undefined behavior. } \apireturnvalues{ @@ -33,23 +33,21 @@ } \apinotes{ - As of \openshmem Specification 1.2 the use of \FUNC{start\_pes} has been - deprecated and is replaced with \FUNC{shmem\_init}. While support for + As of \openshmem[1.2], the use of \FUNC{start\_pes} has been + deprecated and calls to it should be replaced with calls to \FUNC{shmem\_init}. While support for \FUNC{start\_pes} is still required in \openshmem libraries, users are - encouraged to use \FUNC{shmem\_init}. Replacing \FUNC{start\_pes} with - \FUNC{shmem\_init} in \openshmem programs with no further changes is possible; - there is an implicit \FUNC{shmem\_finalize} at the end of main. However, - \FUNC{shmem\_init} differs slightly from \FUNC{start\_pes}: multiple calls to + encouraged to use \FUNC{shmem\_init}. An important difference between + \FUNC{shmem\_init} and \FUNC{start\_pes} is that multiple calls to \FUNC{shmem\_init} within a program results in undefined behavior, while in the case of \FUNC{start\_pes}, any subsequent calls to \FUNC{start\_pes} after the - first one resulted in a no-op. + first one results in a no-op. } \begin{apiexamples} \apifexample - { This is a simple program that calls \FUNC{shmem\_init}: } - { example_code/shmem_init_example.f90 } +{ The following \FUNC{shmem\_init} example is for \Cstd[11] programs: } + { example_code/shmem_init_example.c } {} \end{apiexamples} diff --git a/content/shmem_init_thread.tex b/content/shmem_init_thread.tex new file mode 100644 index 000000000..8796af396 --- /dev/null +++ b/content/shmem_init_thread.tex @@ -0,0 +1,48 @@ +\apisummary{ +Initializes the \openshmem library, similar to \FUNC{shmem\_init}, and performs any +initialization required for supporting the provided thread level. +} + +\begin{apidefinition} + +\begin{Csynopsis} +int @\FuncDecl{shmem\_init\_thread}@(int requested, int *provided); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{IN}{requested}{The thread level support requested by the user.} +\apiargument{OUT}{provided}{The thread level support provided by the \openshmem implementation.} +\end{apiarguments} + +\apidescription{ +\FUNC{shmem\_init\_thread} initializes the \openshmem library in the same way as +\FUNC{shmem\_init}. In addition, \FUNC{shmem\_init\_thread} also performs +the initialization required for supporting the provided thread level. +The argument \VAR{requested} is used to specify the desired level of +thread support. The argument \VAR{provided} returns the support level +provided by the library. The allowed values for \VAR{provided} and +\VAR{requested} are \CONST{SHMEM\_THREAD\_SINGLE}, \CONST{SHMEM\_THREAD\_FUNNELED}, +\CONST{SHMEM\_THREAD\_SERIALIZED}, and \CONST{SHMEM\_THREAD\_MULTIPLE}. + +An \openshmem program is initialized either by \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread}. +Once an \openshmem library initialization call has been performed, a subsequent +initialization call in the same program results in undefined behavior. +If the call to \FUNC{shmem\_init\_thread} +is unsuccessful in allocating and initializing resources for the +\openshmem library, then the behavior of any subsequent call +to the \openshmem library is undefined. +} + +\apireturnvalues{ +\FUNC{shmem\_init\_thread} returns 0 upon success; otherwise, it returns a +non-zero value. +} + +\apinotes{ +The \openshmem library can be initialized either by \FUNC{shmem\_init} +or \FUNC{shmem\_init\_thread}. If the \openshmem library is initialized +by \FUNC{shmem\_init}, the library implementation can choose to +support any one of the defined thread levels. +} + +\end{apidefinition} diff --git a/content/shmem_iput.tex b/content/shmem_iput.tex index c5179ad47..537b5b770 100644 --- a/content/shmem_iput.tex +++ b/content/shmem_iput.tex @@ -5,53 +5,59 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_iput(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_iput}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_iput}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__iput(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_iput}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_iput}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void shmem_iput(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_iput\FuncParam{SIZE}}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_iput\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{Fsynopsis} INTEGER dst, sst, nelems, pe -CALL SHMEM_COMPLEX_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_DOUBLE_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_INTEGER_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT4(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT8(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT32(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT64(dest, source, dst, sst, nelems, pe) -CALL SHMEM_IPUT128(dest, source, dst, sst, nelems, pe) -CALL SHMEM_LOGICAL_IPUT(dest, source, dst, sst, nelems, pe) -CALL SHMEM_REAL_IPUT(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_COMPLEX\_IPUT}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_DOUBLE\_IPUT}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_INTEGER\_IPUT}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IPUT4}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IPUT8}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IPUT32}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IPUT64}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_IPUT128}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_LOGICAL\_IPUT}@(dest, source, dst, sst, nelems, pe) +CALL @\FuncDecl{SHMEM\_REAL\_IPUT}@(dest, source, dst, sst, nelems, pe) \end{Fsynopsis} \begin{apiarguments} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \apiargument{OUT}{dest}{Array to be updated on the remote \ac{PE}. This data object must be remotely accessible.} \apiargument{IN}{source}{Array containing the data to be copied.} \apiargument{IN}{dst}{The stride between consecutive elements of the \dest{} array. The stride is scaled by the element size of the \dest{} array. A value of \CONST{1} indicates contiguous data. \VAR{dst} must be of type - \textit{ptrdiff\_t}. If you are using \Fortran, it must be a default integer value.} + \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} \apiargument{IN}{sst}{The stride between consecutive elements of the \source{} array. The stride is scaled by the element size of the \source{} array. A value of \CONST{1} indicates contiguous data. \VAR{sst} must be - of type \textit{ptrdiff\_t}. If you are using \Fortran, it must be a + of type \CTYPE{ptrdiff\_t}. When using \Fortran, it must be a default integer value.} \apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be - of type integer. If you are using \Fortran, it must be a constant, + of type integer. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \end{apiarguments} @@ -72,7 +78,7 @@ }{Routine}{Data type of \VAR{dest} and \VAR{source}} \apitablerow{shmem\_iput4, shmem\_iput32}{Any noncharacter type that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_iput8}{\Clang: Any noncharacter type that + \apitablerow{shmem\_iput8}{\Cstd: Any noncharacter type that has a storage size equal to \CONST{8} bits.} \apitablerow{}{\Fortran: Any noncharacter type that has a storage size equal to \CONST{64} bits.} @@ -91,7 +97,7 @@ } \apinotes{ - If you are using \Fortran, data types must be of default size. For example, a + When using \Fortran, data types must be of default size. For example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4} or \CONST{REAL(KIND=KIND(1.0))}. See Section \ref{subsec:memory_model} for a definition of the term @@ -101,7 +107,7 @@ \begin{apiexamples} \apicexample - {Consider the following \FUNC{shmem\_iput} example for \CorCpp{} programs.} + {Consider the following \FUNC{shmem\_iput} example for \Cstd[11] programs.} {./example_code/shmem_iput_example.c} {} \end{apiexamples} diff --git a/content/shmem_lock.tex b/content/shmem_lock.tex index 07073645d..5b028669f 100644 --- a/content/shmem_lock.tex +++ b/content/shmem_lock.tex @@ -4,29 +4,29 @@ \begin{apidefinition} \begin{Csynopsis} -void shmem_clear_lock(volatile long *lock); -void shmem_set_lock(volatile long *lock); -int shmem_test_lock(volatile long *lock); +void @\FuncDecl{shmem\_clear\_lock}@(long *lock); +void @\FuncDecl{shmem\_set\_lock}@(long *lock); +int @\FuncDecl{shmem\_test\_lock}@(long *lock); \end{Csynopsis} \begin{Fsynopsis} INTEGER lock, SHMEM_TEST_LOCK -CALL SHMEM_CLEAR_LOCK(lock) -CALL SHMEM_SET_LOCK(lock) -I = SHMEM_TEST_LOCK(lock) +CALL @\FuncDecl{SHMEM\_CLEAR\_LOCK}@(lock) +CALL @\FuncDecl{SHMEM\_SET\_LOCK}@(lock) +I = @\FuncDecl{SHMEM\_TEST\_LOCK}@(lock) \end{Fsynopsis} \begin{apiarguments} \apiargument{IN}{lock}{A symmetric data object that is a scalar variable or an array of length \CONST{1}. This data object must be set to \CONST{0} on all - \ac{PE}s prior to the first use. \VAR{lock} must be of type \CONST{long}. - If you are using \Fortran, it must be of default kind.} + \acp{PE} prior to the first use. \VAR{lock} must be of type \CONST{long}. + When using \Fortran, it must be of default kind.} \end{apiarguments} \apidescription{ The \FUNC{shmem\_set\_lock} routine sets a mutual exclusion lock after waiting for the lock to be freed by any other \ac{PE} currently holding the lock. - Waiting \ac{PE}s are assured of getting the lock in a first-come, first-served + Waiting \acp{PE} are assured of getting the lock in a first-come, first-served manner. The \FUNC{shmem\_clear\_lock} routine releases a lock previously set by \FUNC{shmem\_set\_lock} after ensuring that all local and remote stores initiated in the critical region are complete. The \FUNC{shmem\_test\_lock} @@ -34,7 +34,7 @@ this routine, a \ac{PE} can avoid blocking on a set lock. If the lock is currently set, the routine returns without waiting. These routines are appropriate for protecting a critical region from simultaneous update by - multiple \ac{PE}s. + multiple \acp{PE}. } \apireturnvalues{ @@ -54,7 +54,7 @@ \begin{apiexamples} \apicexample - {The following example uses \FUNC{shmem\_lock} in a \Clang{} program.} + {The following example uses \FUNC{shmem\_lock} in a \Cstd[11] program.} {./example_code/shmem_lock_example.c} {} diff --git a/content/shmem_malloc.tex b/content/shmem_malloc.tex index 35e845bfd..3a8fe2d46 100644 --- a/content/shmem_malloc.tex +++ b/content/shmem_malloc.tex @@ -1,14 +1,14 @@ \apisummary{ - Symmetric heap memory management routines. + Collective symmetric heap memory management routines. } \begin{apidefinition} \begin{Csynopsis} -void *shmem_malloc(size_t size); -void shmem_free(void *ptr); -void *shmem_realloc(void *ptr, size_t size); -void *shmem_align(size_t alignment, size_t size); +void *@\FuncDecl{shmem\_malloc}@(size_t size); +void @\FuncDecl{shmem\_free}@(void *ptr); +void *@\FuncDecl{shmem\_realloc}@(void *ptr, size_t size); +void *@\FuncDecl{shmem\_align}@(size_t alignment, size_t size); \end{Csynopsis} \begin{apiarguments} @@ -21,43 +21,58 @@ \apidescription{ + The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, \FUNC{shmem\_realloc}, and + \FUNC{shmem\_align} routines are collective operations that require + participation by all \acp{PE}. + The \FUNC{shmem\_malloc} routine returns a pointer to a block of at least \VAR{size} bytes suitably aligned for any use. This space is allocated from the symmetric heap (in contrast to \FUNC{malloc}, which allocates from the private heap). The \FUNC{shmem\_align} routine allocates a block in the symmetric heap that has - a byte alignment specified by the alignment argument. + a byte alignment specified by the \VAR{alignment} argument. The \FUNC{shmem\_free} routine causes the block to which \VAR{ptr} points to be deallocated, that is, made available for further allocation. If \VAR{ptr} is a - null pointer, no action occurs. + null pointer, no action occurs. The \FUNC{shmem\_realloc} routine changes the size of the block to which \VAR{ptr} points to the size (in bytes) specified by \VAR{size}. The contents of the block are unchanged up to the lesser of the new and old sizes. If the new size is larger, the newly allocated portion of the block is - uninitialized. If \VAR{ptr} is a \CONST{NULL} pointer, the + uninitialized. If \VAR{ptr} is a null pointer, the \FUNC{shmem\_realloc} routine behaves like the \FUNC{shmem\_malloc} routine for the specified size. If \VAR{size} is \CONST{0} and \VAR{ptr} is not a - \CONST{NULL} pointer, the block to which it points is freed. If the space cannot + null pointer, the block to which it points is freed. If the space cannot be allocated, the block to which \VAR{ptr} points is unchanged. - The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, and \FUNC{shmem\_realloc} routines - are provided so that multiple \ac{PE}s in a program can allocate symmetric, + The \FUNC{shmem\_malloc}, \FUNC{shmem\_align}, \FUNC{shmem\_free}, and \FUNC{shmem\_realloc} routines + are provided so that multiple \acp{PE} in a program can allocate symmetric, remotely accessible memory blocks. These memory blocks can then be used with - \openshmem communication routines. Each of these routines call the - \FUNC{shmem\_barrier\_all} routine before returning; this ensures that all - \ac{PE}s participate in the memory allocation, and that the memory on other - \ac{PE}s can be used as soon as the local \ac{PE} returns. The user is + \openshmem communication routines. Each of these routines includes at least one + call to a procedure that is semantically equivalent to \FUNC{shmem\_barrier\_all}: + \FUNC{shmem\_malloc} and \FUNC{shmem\_align} call a + barrier on exit; \FUNC{shmem\_free} calls a barrier on entry; and + \FUNC{shmem\_realloc} may call barriers on both entry and exit, depending on + whether an existing allocation is modified and whether new memory is allocated. + This ensures that all + \acp{PE} participate in the memory allocation, and that the memory on other + \acp{PE} can be used as soon as the local \ac{PE} returns. + The implicit barriers performed by these routines quiet the + default context. It is the user's responsibility to ensure that no + communication operations involving the given memory block are pending on + other contexts prior to calling + the \FUNC{shmem\_free} and \FUNC{shmem\_realloc} routines. + The user is also responsible for calling these routines with identical argument(s) on all - \ac{PE}s; if differing \VAR{size} arguments are used, the behavior of the call - and any subsequent \openshmem calls becomes undefined. + \acp{PE}; if differing \VAR{ptr}, \VAR{size}, or \VAR{alignment} arguments are used, the behavior of the call + and any subsequent \openshmem calls is undefined. } \apireturnvalues{ The \FUNC{shmem\_malloc} routine returns a pointer to the allocated space; - otherwise, it returns a \CONST{NULL} pointer. + otherwise, it returns a null pointer. The \FUNC{shmem\_free} routine returns no value. @@ -65,33 +80,33 @@ (which may have moved); otherwise, it returns a null pointer. The \FUNC{shmem\_align} routine returns an aligned pointer to the allocated - space; otherwise, it returns a \CONST{NULL} pointer. + space; otherwise, it returns a null pointer. } \apinotes{ - As of Specification 1.2 the use of \FUNC{shmalloc}, \FUNC{shmemalign}, - \FUNC{shfree}, and \FUNC{shrealloc} has been deprecated. Although OpenSHMEM - libraries are required to support the calls, program users are encouraged to use + As of \openshmem[1.2] the use of \FUNC{shmalloc}, \FUNC{shmemalign}, + \FUNC{shfree}, and \FUNC{shrealloc} has been deprecated. Although \openshmem + libraries are required to support the calls, users are encouraged to use \FUNC{shmem\_malloc}, \FUNC{shmem\_align}, \FUNC{shmem\_free}, and \FUNC{shmem\_realloc} instead. The behavior and signature of the routines remains unchanged from the deprecated versions. The total size of the symmetric heap is determined at job startup. One can - adjust the size of the heap using the \CONST{SMA\_SYMMETRIC\_SIZE} environment + specify the size of the heap using the \VAR{SHMEM\_SYMMETRIC\_SIZE} environment variable (where available). The \FUNC{shmem\_malloc}, \FUNC{shmem\_free}, and \FUNC{shmem\_realloc} routines - differ from the private heap allocation routines in that all \ac{PE}s in a + differ from the private heap allocation routines in that all \acp{PE} in a program must call them (a barrier is used to ensure this). } \apiimpnotes{ The symmetric heap allocation routines always return a pointer to corresponding - symmetric objects across all PEs. The \openshmem{} specification does not + symmetric objects across all \acp{PE}. The \openshmem specification does not require that the virtual addresses are equal across all \acp{PE}. Nevertheless, the implementation must avoid costly address translation operations in the - communication path, including order $N$ (where $N$ is the number of \acp{PE}) - memory translation tables. In order to avoid address translations, the + communication path, including $O(N)$ memory translation tables, + where $N$ is the number of \acp{PE}. In order to avoid address translations, the implementation may re-map the allocated block of memory based on agreed virtual address. Additionally, some operating systems provide an option to disable virtual address randomization, which enables predictable allocation of virtual diff --git a/content/shmem_my_pe.tex b/content/shmem_my_pe.tex index e32bf847a..c1500d461 100644 --- a/content/shmem_my_pe.tex +++ b/content/shmem_my_pe.tex @@ -5,12 +5,12 @@ \begin{apidefinition} \begin{Csynopsis} -int shmem_my_pe(void); +int @\FuncDecl{shmem\_my\_pe}@(void); \end{Csynopsis} \begin{Fsynopsis} INTEGER SHMEM_MY_PE, ME -ME = SHMEM_MY_PE() +ME = @\FuncDecl{SHMEM\_MY\_PE}@() \end{Fsynopsis} \begin{apiarguments} @@ -20,7 +20,7 @@ \apidescription{ This routine returns the \ac{PE} number of the calling \ac{PE}. It accepts no arguments. The result is an integer between \CONST{0} and \VAR{npes} - - \CONST{1}, where \VAR{npes} is the total number of \ac{PE}s executing the + \CONST{1}, where \VAR{npes} is the total number of \acp{PE} executing the current program. } @@ -29,21 +29,12 @@ } \apinotes{ - Each \ac{PE} has a unique number or identifier. As of \openshmem Specification - 1.2 the use of \FUNC{\_my\_pe} has been deprecated. Although \openshmem + Each \ac{PE} has a unique number or identifier. As of \openshmem[1.2] + the use of \FUNC{\_my\_pe} has been deprecated. Although \openshmem libraries are required to support the call, users are encouraged to use \FUNC{shmem\_my\_pe} instead. The behavior and signature of the routine \FUNC{shmem\_my\_pe} remains unchanged from the deprecated \FUNC{\_my\_pe} version. } -\begin{apiexamples} - -\apicexample - {The following \FUNC{shmem\_my\_pe} example is for \CorCpp{} programs:} - {./example_code/shmem_mype_example.c} - {} - -\end{apiexamples} - \end{apidefinition} diff --git a/content/shmem_n_pes.tex b/content/shmem_n_pes.tex index 758e152b9..c9fa29a8c 100644 --- a/content/shmem_n_pes.tex +++ b/content/shmem_n_pes.tex @@ -1,16 +1,16 @@ \apisummary{ - Returns the number of \ac{PE}s running in a program. + Returns the number of \acp{PE} running in a program. } \begin{apidefinition} \begin{Csynopsis} -int shmem_n_pes(void); +int @\FuncDecl{shmem\_n\_pes}@(void); \end{Csynopsis} \begin{Fsynopsis} INTEGER SHMEM_N_PES, N_PES -N_PES = SHMEM_N_PES() +N_PES = @\FuncDecl{SHMEM\_N\_PES}@() \end{Fsynopsis} \begin{apiarguments} @@ -18,15 +18,15 @@ \end{apiarguments} \apidescription{ - The routine returns the number of \ac{PE}s running in the program. + The routine returns the number of \acp{PE} running in the program. } \apireturnvalues{ - Integer - Number of \ac{PE}s running in the \openshmem program. + Integer - Number of \acp{PE} running in the \openshmem program. } \apinotes{ - As of \openshmem Specification 1.2 the use of \FUNC{\_num\_pes} has been + As of \openshmem[1.2] the use of \FUNC{\_num\_pes} has been deprecated. Although \openshmem libraries are required to support the call, users are encouraged to use \FUNC{shmem\_n\_pes} instead. The behavior and signature of the routine \FUNC{shmem\_n\_pes} remains unchanged from the @@ -36,7 +36,8 @@ \begin{apiexamples} \apicexample - {The following \FUNC{shmem\_n\_pes} example is for \CorCpp{} programs:} + {The following \FUNC{shmem\_my\_pe} and \FUNC{shmem\_n\_pes} example is for + \CorCpp{} programs:} {./example_code/shmem_npes_example.c} {} diff --git a/content/shmem_p.tex b/content/shmem_p.tex index 001b5c97c..dfe6beed0 100644 --- a/content/shmem_p.tex +++ b/content/shmem_p.tex @@ -5,21 +5,26 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_p(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_p}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_p}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__p(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_p}@(TYPE *dest, TYPE value, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_p}@(shmem_ctx_t ctx, TYPE *dest, TYPE value, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{apiarguments} - \apiargument{IN}{addr}{The remotely accessible array element or scalar data object + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{The remotely accessible array element or scalar data object which will receive the data on the remote \ac{PE}.} - \apiargument{IN}{value}{The value to be transferred to \VAR{addr} on the + \apiargument{IN}{value}{The value to be transferred to \VAR{dest} on the remote \ac{PE}.} - \apiargument{IN}{pe}{The number of the remote \ac{PE}.} + \apiargument{IN}{pe}{The number of the remote \ac{PE}.} \end{apiarguments} \apidescription{ @@ -42,7 +47,7 @@ \begin{apiexamples} \apicexample - {The following example uses \FUNC{shmem\_p} in a \Clang{} program.} + {The following example uses \FUNC{shmem\_p} in a \Cstd[11] program.} {./example_code/shmem_p_example.c} {} diff --git a/content/shmem_pe_accessible.tex b/content/shmem_pe_accessible.tex index 11a3b8b91..8c32fab84 100644 --- a/content/shmem_pe_accessible.tex +++ b/content/shmem_pe_accessible.tex @@ -6,7 +6,7 @@ \begin{apidefinition} \begin{Csynopsis} -int shmem_pe_accessible(int pe); +int @\FuncDecl{shmem\_pe\_accessible}@(int pe); \end{Csynopsis} \begin{Fsynopsis} @@ -21,32 +21,31 @@ \end{apiarguments} \apidescription{ - \FUNC{shmem\_pe\_accessible} is a query routine that indicates whether a + \FUNC{shmem\_pe\_accessible} is a query routine that indicates whether a specified \ac{PE} is accessible via \openshmem from the local \ac{PE}. The - \FUNC{shmem\_pe\_accessible} routine returns \CONST{TRUE} only if the remote - \ac{PE} is a process running from the same executable file as the local - \ac{PE}, indicating that full \openshmem support for symmetric data objects - (that reside in the static memory and symmetric heap) is available, otherwise it - returns \CONST{FALSE}. This routine may be particularly useful for hybrid - programming with other communication libraries (such as a \ac{MPI}) or parallel - languages. For example, on SGI Altix series systems, \openshmem is - supported across multiple partitioned hosts and InfiniBand connected hosts. - When running multiple executable MPI programs using \openshmem on an Altix, full - \openshmem support is available between processes running from the same - executable file. However, \openshmem support between processes of different - executable files is supported only for data objects on the symmetric heap, - since static data objects are not symmetric between different executable - files. + \FUNC{shmem\_pe\_accessible} routine returns a value indicating whether the remote + \ac{PE} is a process running from the same executable file as the local + \ac{PE}, thereby indicating whether full support for symmetric data objects, + which may reside in either static memory or the symmetric heap, is available. } \apireturnvalues{ \CorCpp: The return value is 1 if the specified \ac{PE} is a valid remote \ac{PE} - for \openshmem routines; otherwise, it is 0. \\ \\ + for \openshmem routines; otherwise, it is 0. \Fortran: The return value is \CONST{.TRUE.} if the specified \ac{PE} is a valid - remote \ac{PE} for \openshmem routines; otherwise, it is \CONST{.FALSE.}. + remote \ac{PE} for \openshmem routines; otherwise, it is \CONST{.FALSE.}. } -\apinotes{ None. } +\apinotes{ + This routine may be particularly useful for hybrid programming with other + communication libraries (such as \ac{MPI}) or parallel languages. For + example, when an \ac{MPI} job uses \ac{MPMD} mode, multiple executable + \ac{MPI} programs are executed as part of the same MPI job. In such cases, + \openshmem support may only be available between processes running from the + same executable file. In addition, some environments may allow a hybrid + job to span multiple network partitions. In such scenarios, \openshmem + support may only be available between \acp{PE} within the same partition. +} \end{apidefinition} diff --git a/content/shmem_ptr.tex b/content/shmem_ptr.tex index b9f8d2679..98d8c592a 100644 --- a/content/shmem_ptr.tex +++ b/content/shmem_ptr.tex @@ -1,24 +1,24 @@ \apisummary{ - Returns a pointer to a data object on a specified \ac{PE}. + Returns a local pointer to a symmetric data object on the specified \ac{PE}. } \begin{apidefinition} \begin{Csynopsis} -void *shmem_ptr(const void *dest, int pe); +void *@\FuncDecl{shmem\_ptr}@(const void *dest, int pe); \end{Csynopsis} \begin{Fsynopsis} POINTER (PTR, POINTEE) INTEGER pe -PTR = SHMEM_PTR(dest, pe) +PTR = @\FuncDecl{SHMEM\_PTR}@(dest, pe) \end{Fsynopsis} \begin{apiarguments} \apiargument{IN}{dest}{The symmetric data object to be referenced.} \apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which \dest{} is to - be accessed. If you are using \Fortran, it must be a default + be accessed. When using \Fortran, it must be a default integer value.} \end{apiarguments} @@ -27,17 +27,16 @@ \dest{} on the specified \ac{PE}. This address can be assigned to a pointer. After that, ordinary loads and stores to this remote address may be performed. - When a sequence of loads (gets) and stores (puts) to a data object on a - remote \ac{PE} does not match the access pattern provided in an \openshmem data - transfer routine like \FUNC{shmem\_put32} or \FUNC{shmem\_real\_iget}, the - \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish the - communication. + The \FUNC{shmem\_ptr} routine can provide an efficient means to accomplish + communication, for example when a sequence of reads and writes to a data + object on a remote \ac{PE} does not match the access pattern provided in an + \openshmem data transfer routine like \FUNC{shmem\_put} or + \FUNC{shmem\_iget}. } \apireturnvalues{ - The return value is a non-NULL address of the \dest{} data object when it is - accessible using memory loads and stores in addition to \openshmem{} operations. - Otherwise, a NULL address is returned. + The address of the \dest{} data object is returned when it is accessible + using memory loads and stores. Otherwise, a null pointer is returned. } \apinotes{ @@ -48,13 +47,13 @@ \begin{apiexamples} \apifexample - { This \Fortran{} program calls \FUNC{shmem\_ptr} and then \ac{PE} 0 writes to + { This \Fortran program calls \FUNC{shmem\_ptr} and then \ac{PE} 0 writes to the \VAR{BIGD} array on \ac{PE} 1: } {./example_code/shmem_ptr_example.f90 } {} \apicexample - {This is the equivalent program written in \Clang:} + {This is the equivalent program written in \Cstd[11]:} {./example_code/shmem_ptr_example.c} {} diff --git a/content/shmem_put.tex b/content/shmem_put.tex index a9c4b9293..c21a0cd9e 100644 --- a/content/shmem_put.tex +++ b/content/shmem_put.tex @@ -6,49 +6,56 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_put(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_put}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_put}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__put(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void shmem_put(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void shmem_putmem(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_putmem}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_putmem}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} \begin{Fsynopsis} -CALL SHMEM_CHARACTER_PUT(dest, source, nelems, pe) -CALL SHMEM_COMPLEX_PUT(dest, source, nelems, pe) -CALL SHMEM_DOUBLE_PUT(dest, source, nelems, pe) -CALL SHMEM_INTEGER_PUT(dest, source, nelems, pe) -CALL SHMEM_LOGICAL_PUT(dest, source, nelems, pe) -CALL SHMEM_PUT4(dest, source, nelems, pe) -CALL SHMEM_PUT8(dest, source, nelems, pe) -CALL SHMEM_PUT32(dest, source, nelems, pe) -CALL SHMEM_PUT64(dest, source, nelems, pe) -CALL SHMEM_PUT128(dest, source, nelems, pe) -CALL SHMEM_PUTMEM(dest, source, nelems, pe) -CALL SHMEM_REAL_PUT(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_CHARACTER\_PUT}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_COMPLEX\_PUT}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_DOUBLE\_PUT}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_INTEGER\_PUT}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_LOGICAL\_PUT}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT4}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT8}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT32}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT64}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT128}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUTMEM}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_REAL\_PUT}@(dest, source, nelems, pe) \end{Fsynopsis} \begin{apiarguments} - \apiargument{IN}{dest}{Data object to be updated on the remote \ac{PE}. This + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} - \apiargument{OUT}{source}{Data object containing the data to be copied.} + \apiargument{IN}{source}{Data object containing the data to be copied.} \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are using + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be - of type integer. If you are using \Fortran, it must be a constant, variable, + of type integer. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \end{apiarguments} @@ -64,11 +71,11 @@ The \dest{} and \source{} data objects must conform to certain typing constraints, which are as follows:} {Routine}{Data type of \VAR{dest} and \VAR{source}} - \apitablerow{shmem\_putmem}{\Fortran: Any noncharacter type. \Clang: Any + \apitablerow{shmem\_putmem}{\Fortran: Any noncharacter type. \Cstd: Any data type. nelems is scaled in bytes.} \apitablerow{shmem\_put4, shmem\_put32}{Any noncharacter type that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_put8}{\Clang: Any noncharacter type that + \apitablerow{shmem\_put8}{\Cstd: Any noncharacter type that has a storage size equal to \CONST{8} bits.} \apitablerow{}{\Fortran: Any noncharacter type that has a storage size equal to \CONST{64} bits.} @@ -89,9 +96,10 @@ None. } \apinotes{ - If you are using \Fortran, data types must be of default size. For example, + When using \Fortran, data types must be of default size. For example, a real variable must be declared as \CONST{REAL}, \CONST{REAL*4}, or - \CONST{REAL(KIND=KIND(1.0))}. The Fortran API routine \FUNC{SHMEM\_PUT} has + \CONST{REAL(KIND=KIND(1.0))}. + As of \openshmem[1.2], the \Fortran API routine \FUNC{SHMEM\_PUT} has been deprecated, and either \FUNC{SHMEM\_PUT8} or \FUNC{SHMEM\_PUT64} should be used in its place. } @@ -99,7 +107,7 @@ \begin{apiexamples} \apicexample - { The following \FUNC{shmem\_put} example is for \CorCpp{} programs:} + { The following \FUNC{shmem\_put} example is for \Cstd[11] programs:} {./example_code/shmem_put_example.c} {} \end{apiexamples} diff --git a/content/shmem_put_nbi.tex b/content/shmem_put_nbi.tex index 4d010ca0a..dd9ac0413 100644 --- a/content/shmem_put_nbi.tex +++ b/content/shmem_put_nbi.tex @@ -6,49 +6,56 @@ \begin{apidefinition} \begin{C11synopsis} -void shmem_put_nbi(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_put\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_put\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{C11synopsis} where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}. \begin{Csynopsis} -void shmem__put_nbi(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_put\_nbi}@(TYPE *dest, const TYPE *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_put\_nbi}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, size_t nelems, int pe); \end{Csynopsis} where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}. \begin{CsynopsisCol} -void shmem_put_nbi(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_put\FuncParam{SIZE}\_nbi}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_put\FuncParam{SIZE}\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}. \begin{CsynopsisCol} -void shmem_putmem_nbi(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_putmem\_nbi}@(void *dest, const void *source, size_t nelems, int pe); +void @\FuncDecl{shmem\_ctx\_putmem\_nbi}@(shmem_ctx_t ctx, void *dest, const void *source, size_t nelems, int pe); \end{CsynopsisCol} \begin{Fsynopsis} -CALL SHMEM_CHARACTER_PUT_NBI(dest, source, nelems, pe) -CALL SHMEM_COMPLEX_PUT_NBI(dest, source, nelems, pe) -CALL SHMEM_DOUBLE_PUT_NBI(dest, source, nelems, pe) -CALL SHMEM_INTEGER_PUT_NBI(dest, source, nelems, pe) -CALL SHMEM_LOGICAL_PUT_NBI(dest, source, nelems, pe) -CALL SHMEM_PUT4_NBI(dest, source, nelems, pe) -CALL SHMEM_PUT8_NBI(dest, source, nelems, pe) -CALL SHMEM_PUT32_NBI(dest, source, nelems, pe) -CALL SHMEM_PUT64_NBI(dest, source, nelems, pe) -CALL SHMEM_PUT128_NBI(dest, source, nelems, pe) -CALL SHMEM_PUTMEM_NBI(dest, source, nelems, pe) -CALL SHMEM_REAL_PUT_NBI(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_CHARACTER\_PUT\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_COMPLEX\_PUT\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_DOUBLE\_PUT\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_INTEGER\_PUT\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_LOGICAL\_PUT\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT4\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT8\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT32\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT64\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUT128\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_PUTMEM\_NBI}@(dest, source, nelems, pe) +CALL @\FuncDecl{SHMEM\_REAL\_PUT\_NBI}@(dest, source, nelems, pe) \end{Fsynopsis} \begin{apiarguments} - \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} + \apiargument{OUT}{dest}{Data object to be updated on the remote \ac{PE}. This data object must be remotely accessible.} - \apiargument{IN}{source}{Data object containing the data to be copied.} - \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} - arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Clang. If you are using + \apiargument{IN}{source}{Data object containing the data to be copied.} + \apiargument{IN}{nelems}{Number of elements in the \VAR{dest} and \VAR{source} + arrays. \VAR{nelems} must be of type \VAR{size\_t} for \Cstd. When using \Fortran, it must be a constant, variable, or array element of default integer type.} - \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be - of type integer. If you are using \Fortran, it must be a constant, variable, + \apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}. \VAR{pe} must be + of type integer. When using \Fortran, it must be a constant, variable, or array element of default integer type.} \end{apiarguments} @@ -68,11 +75,11 @@ The \dest{} and \source{} data objects must conform to certain typing constraints, which are as follows:} {Routine}{Data type of \VAR{dest} and \VAR{source}} - \apitablerow{shmem\_putmem\_nbi}{\Fortran: Any noncharacter type. \Clang: + \apitablerow{shmem\_putmem\_nbi}{\Fortran: Any noncharacter type. \Cstd: Any data type. nelems is scaled in bytes.} \apitablerow{shmem\_put4\_nbi, shmem\_put32\_nbi}{Any noncharacter type that has a storage size equal to \CONST{32} bits.} - \apitablerow{shmem\_put8\_nbi}{\Clang: Any noncharacter type that + \apitablerow{shmem\_put8\_nbi}{\Cstd: Any noncharacter type that has a storage size equal to \CONST{8} bits.} \apitablerow{}{\Fortran: Any noncharacter type that has a storage size equal to \CONST{64} bits.} diff --git a/content/shmem_query_thread.tex b/content/shmem_query_thread.tex new file mode 100644 index 000000000..37b218cd4 --- /dev/null +++ b/content/shmem_query_thread.tex @@ -0,0 +1,31 @@ +\apisummary{ +Returns the level of thread support provided by the library. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_query\_thread}@(int *provided); +\end{Csynopsis} + +\begin{apiarguments} +\apiargument{OUT}{provided}{The thread level support provided by the \openshmem implementation.} +\end{apiarguments} + +\apidescription{ +The \FUNC{shmem\_query\_thread} call returns the level of thread support +currently being provided. The value returned will be same as was returned in \VAR{provided} +by a call to \FUNC{shmem\_init\_thread}, if the \openshmem library was +initialized by \FUNC{shmem\_init\_thread}. If the library was initialized by +\FUNC{shmem\_init}, the implementation can choose to provide any one of the defined +thread levels, and \FUNC{shmem\_query\_thread} returns this thread level. +} + +\apireturnvalues{ +None. +} + +\apinotes{ +None. +} +\end{apidefinition} diff --git a/content/shmem_quiet.tex b/content/shmem_quiet.tex index 83df95543..35791b47a 100644 --- a/content/shmem_quiet.tex +++ b/content/shmem_quiet.tex @@ -1,28 +1,31 @@ \apisummary{ - Waits for completion of all outstanding \PUT{}, \acp{AMO}, memory store, - and non-blocking \PUT{} and \GET{} routines to symmetric data + Waits for completion of all outstanding \PUT{}, \ac{AMO}, memory store, + and nonblocking \PUT{} and \GET{} routines to symmetric data objects issued by a \ac{PE}. } \begin{apidefinition} \begin{Csynopsis} -void shmem_quiet(void); +void @\FuncDecl{shmem\_quiet}@(void); +void @\FuncDecl{shmem\_ctx\_quiet}@(shmem_ctx_t ctx); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_QUIET +CALL @\FuncDecl{SHMEM\_QUIET}@ \end{Fsynopsis} \begin{apiarguments} - \apiargument{None.}{}{} + \apiargument{IN}{ctx}{The context on which to perform the operation. + When this argument is not provided, the operation is performed on + \CONST{SHMEM\_CTX\_DEFAULT}.} \end{apiarguments} \apidescription{ - The \FUNC{shmem\_quiet} routine ensures completion of \PUT{}, \acp{AMO}, - memory store, and non-blocking \PUT{} and \GET{} routines on - symmetric data objects issued by the calling \ac{PE}. All \PUT{}, \acp{AMO}, - memory store, and non-blocking \PUT{} and \GET{} routines to + The \FUNC{shmem\_quiet} routine ensures completion of \PUT{}, \ac{AMO}, + memory store, and nonblocking \PUT{} and \GET{} routines on + symmetric data objects issued by the calling \ac{PE} on the given context. All \PUT{}, \ac{AMO}, + memory store, and nonblocking \PUT{} and \GET{} routines to symmetric data objects are guaranteed to be completed and visible to all \acp{PE} when \FUNC{shmem\_quiet} returns. } @@ -34,22 +37,47 @@ \apinotes{ \FUNC{shmem\_quiet} is most useful as a way of ensuring completion of - several \PUT{}, \acp{AMO}, memory store, and non-blocking \PUT{} + several \PUT{}, \ac{AMO}, memory store, and nonblocking \PUT{} and \GET{} routines to symmetric data objects initiated by the calling - \ac{PE}. For example, you might use \FUNC{shmem\_quiet} to await delivery - of a block of data before issuing another \PUT{} or non-blocking + \ac{PE}. For example, one might use \FUNC{shmem\_quiet} to await delivery + of a block of data before issuing another \PUT{} or nonblocking \PUT{} routine, which sets a completion flag on another \ac{PE}. \FUNC{shmem\_quiet} is not usually needed if \FUNC{shmem\_barrier\_all} or \FUNC{shmem\_barrier} are called. The barrier routines wait for the completion of outstanding writes (\PUT{}, \ac{AMO}, memory stores, and nonblocking \PUT{} and \GET{} routines) to symmetric data objects on all \acp{PE}. + + In an \openshmem program with multithreaded \acp{PE}, it is the + user's responsibility to ensure ordering between operations issued by the threads + in a \ac{PE} that target symmetric memory (e.g. \PUT{}, \ac{AMO}, memory stores, + and nonblocking routines) and calls by threads in that \ac{PE} to + \FUNC{shmem\_quiet}. The \FUNC{shmem\_quiet} routine can enforce memory store ordering only for the + calling thread. Thus, to ensure ordering for memory stores performed by a thread that is + not the thread calling \FUNC{shmem\_quiet}, the update must be made visible to the + calling thread according to the rules of the memory model associated with + the threading environment. + + A call to \FUNC{shmem\_quiet} by a thread completes the operations posted prior + to calling \FUNC{shmem\_quiet}. If the user intends to also complete operations + issued by a thread that is not the thread calling \FUNC{shmem\_quiet}, the + user must ensure that the operations are performed prior to the call to + \FUNC{shmem\_quiet}. This may require the use of a synchronization + operation provided by the threading package. For example, when using POSIX + Threads, the user may call the \FUNC{pthread\_barrier\_wait} routine to + ensure that all threads have issued operations before a thread calls + \FUNC{shmem\_quiet}. + + \FUNC{shmem\_quiet} does not have an effect on the ordering between memory + accesses issued by the target PE. \FUNC{shmem\_wait\_until}, + \FUNC{shmem\_test}, \FUNC{shmem\_barrier}, \FUNC{shmem\_barrier\_all} routines + can be called by the target PE to guarantee ordering of its memory accesses. } \begin{apiexamples} \apicexample - {The following example uses \FUNC{shmem\_quiet} in a \CorCpp{} program: } + {The following example uses \FUNC{shmem\_quiet} in a \Cstd[11] program: } {./example_code/shmem_quiet_example.c} {\VAR{Put1} and \VAR{put2} will be completed and visible before \VAR{put3} and \VAR{put4}.} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index fe05567ef..11ee26923 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -1,218 +1,227 @@ \apisummary{ - Performs arithmetic and logical operations across a set of \ac{PE}s. + The following functions perform reduction operations across all + \acp{PE} in a set of \acp{PE}. } \begin{apidefinition} -\textbf{AND} \newline -Performs a bitwise AND function across a set of processing elements (\ac{PE}s).\newline +\paragraph{AND} +Performs a bitwise AND reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_int_and_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_and_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longlong_and_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_and_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_and\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_and\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_and\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_and\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_INT4_AND_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_AND_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT4\_AND\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_AND\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\bigskip -\textbf{MAX} \newline -Performs a maximum function reduction across a set of processing elements (\ac{PE}s).\newline +\paragraph{MAX} +Performs a maximum-value reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_double_max_to_all(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_max_to_all(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_max_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_max_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_max_to_all(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_max_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_max_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_max\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_max\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_double\_max\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); +void @\FuncDecl{shmem\_float\_max\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_max\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longdouble\_max\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_max\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_INT4_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_MAX_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT4\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL4\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL8\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL16\_MAX\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\bigskip -\textbf{MIN} \newline -Performs a minimum function reduction across a set of processing elements (\ac{PE}s).\newline +\paragraph{MIN} +Performs a minimum-value reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_double_min_to_all(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_min_to_all(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_min_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_min_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_min_to_all(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_min_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_min_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_min\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_min\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_double\_min\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); +void @\FuncDecl{shmem\_float\_min\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_min\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longdouble\_min\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_min\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_INT4_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_MIN_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT4\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL4\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL8\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL16\_MIN\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\bigskip -\textbf{SUM} \newline -Performs a sum reduction across a set of processing elements (\ac{PE}s).\newline +\paragraph{SUM} +Performs a sum reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_complexd_sum_to_all(double complex *dest, const double complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double complex *pWrk, long |\mbox{*pSync);}| -void shmem_complexf_sum_to_all(float complex *dest, const float complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float complex *pWrk, long *pSync); -void shmem_double_sum_to_all(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_sum_to_all(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_sum_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_sum_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride,int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_sum_to_all(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_sum_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_sum_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_complexd\_sum\_to\_all}@(double _Complex *dest, const double _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double _Complex *pWrk, long *pSync); +void @\FuncDecl{shmem\_complexf\_sum\_to\_all}@(float _Complex *dest, const float _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float _Complex *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_sum\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_sum\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_double\_sum\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); +void @\FuncDecl{shmem\_float\_sum\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_sum\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride,int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longdouble\_sum\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_sum\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_COMP4_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_COMP8_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT4_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_SUM_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_COMP4\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_COMP8\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT4\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL4\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL8\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL16\_SUM\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\bigskip -\textbf{PROD} \newline -Performs a product reduction across a set of processing elements (\ac{PE}s).\newline +\paragraph{PROD} +Performs a product reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_complexd_prod_to_all(double complex *dest, const double complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double complex *pWrk, long |\mbox{*pSync);}| -void shmem_complexf_prod_to_all(float complex *dest, const float complex *source, int |\mbox{nreduce,}| int PE_start, int logPE_stride, int PE_size, float complex *pWrk, long *pSync); -void shmem_double_prod_to_all(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); -void shmem_float_prod_to_all(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); -void shmem_int_prod_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_prod_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longdouble_prod_to_all(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); -void shmem_longlong_prod_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_prod_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_complexd\_prod\_to\_all}@(double _Complex *dest, const double _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double _Complex *pWrk, long *pSync); +void @\FuncDecl{shmem\_complexf\_prod\_to\_all}@(float _Complex *dest, const float _Complex *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float _Complex *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_prod\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_prod\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_double\_prod\_to\_all}@(double *dest, const double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, double *pWrk, long *pSync); +void @\FuncDecl{shmem\_float\_prod\_to\_all}@(float *dest, const float *source, int nreduce, int PE_start, int logPE_stride, int PE_size, float *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_prod\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longdouble\_prod\_to\_all}@(long double *dest, const long double *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long double *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_prod\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_COMP4_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_COMP8_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT4_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL4_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL8_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_REAL16_PROD_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_COMP4\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_COMP8\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT4\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL4\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL8\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_REAL16\_PROD\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\bigskip -\textbf{OR} \newline -Performs a bitwise OR function reduction across a set of processing elements (\ac{PE}s).\newline +\paragraph{OR} +Performs a bitwise OR reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_int_or_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_or_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longlong_or_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_or_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_or\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_or\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_or\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_or\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_INT4_OR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, |\mbox{pSync)}| -CALL SHMEM_INT8_OR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, |\mbox{pSync)}| +CALL @\FuncDecl{SHMEM\_INT4\_OR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_OR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} -\bigskip -\textbf{XOR}\newline -Performs a bitwise EXCLUSIVE OR reduction across a set of processing elements (\ac{PE}s).\newline +\paragraph{XOR} +Performs a bitwise exclusive OR (XOR) reduction across a set of \acp{PE}.\newline \begin{Csynopsis} -void shmem_int_xor_to_all(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); -void shmem_long_xor_to_all(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); -void shmem_longlong_xor_to_all(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); -void shmem_short_xor_to_all(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_short\_xor\_to\_all}@(short *dest, const short *source, int nreduce, int PE_start, int logPE_stride, int PE_size, short *pWrk, long *pSync); +void @\FuncDecl{shmem\_int\_xor\_to\_all}@(int *dest, const int *source, int nreduce, int PE_start, int logPE_stride, int PE_size, int *pWrk, long *pSync); +void @\FuncDecl{shmem\_long\_xor\_to\_all}@(long *dest, const long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long *pWrk, long *pSync); +void @\FuncDecl{shmem\_longlong\_xor\_to\_all}@(long long *dest, const long long *source, int nreduce, int PE_start, int logPE_stride, int PE_size, long long *pWrk, long *pSync); \end{Csynopsis} \begin{Fsynopsis} -CALL SHMEM_INT4_XOR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) -CALL SHMEM_INT8_XOR_TO_ALL(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT4\_XOR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) +CALL @\FuncDecl{SHMEM\_INT8\_XOR\_TO\_ALL}@(dest, source, nreduce, PE_start, logPE_stride, PE_size, pWrk, pSync) \end{Fsynopsis} \begin{apiarguments} -\apiargument{IN}{dest}{A symmetric array, of length \VAR{nreduce} elements, to +\apiargument{OUT}{dest}{A symmetric array, of length \VAR{nreduce} elements, to receive the result of the reduction routines. The data type of \dest{} varies with the version of the reduction routine being called. When calling from \CorCpp, refer to the SYNOPSIS section for data type information.} \apiargument{IN}{source}{ A symmetric array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The \source{} argument must have the same data type as \dest.} -\apiargument{IN}{\VAR{nreduce}}{The number of elements in the \dest{} and \source{} - arrays. \VAR{nreduce} must be of type integer. If you are using \Fortran, it +\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} + arrays. \VAR{nreduce} must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the \activeset{} of - \ac{PE}s. \VAR{PE\_start} must be of type integer. If you are using \Fortran, +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of + \acp{PE}. \VAR{PE\_start} must be of type integer. When using \Fortran, it must be a default integer value.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive - \ac{PE} numbers in the \activeset. \VAR{logPE\_stride} must be of type integer. - If you are using \Fortran, it must be a default integer value.} -\apiargument{IN}{PE\_size}{The number of \ac{PE}s in the \activeset. - \VAR{PE\_size} must be of type integer. If you are using \Fortran, it must be a + \ac{PE} numbers in the active set. \VAR{logPE\_stride} must be of type integer. + When using \Fortran, it must be a default integer value.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set. + \VAR{PE\_size} must be of type integer. When using \Fortran, it must be a default integer value.} -\apiargument{IN}{pWrk}{A symmetric work array. The \VAR{pWrk} argument must have the - same data type as \dest. In \CorCpp, this contains max(\VAR{nreduce}/2 + 1, - \CONST{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}) elements. In \Fortran, this - contains max(\VAR{nreduce}/2 + 1, \CONST{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}) +\apiargument{IN}{pWrk}{ + A symmetric work array of size at least + max(\VAR{nreduce}/2 + 1, \CONST{SHMEM\_REDUCE\_MIN\_WRKDATA\_SIZE}) elements.} -\apiargument{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be of - type long and size \CONST{SHMEM\_REDUCE\_SYNC\_SIZE}. In \Fortran, \VAR{pSync} - must be of type integer and size \CONST{SHMEM\_REDUCE\_SYNC\_SIZE}. If you are - using \Fortran, it must be a default integer value. Every element of this array - must be initialized with the value \CONST{SHMEM\_SYNC\_VALUE} (in \CorCpp) or - \CONST{SHMEM\_SYNC\_VALUE} (in \Fortran) before any of the \ac{PE}s in the - \activeset{} enter the reduction routine.} - +\apiargument{IN}{pSync}{ + A symmetric work array of size \CONST{SHMEM\_REDUCE\_SYNC\_SIZE}. + In \CorCpp, \VAR{pSync} must be an array of elements of type \CTYPE{long}. + In \Fortran, \VAR{pSync} must be an array of elements of default integer type. + Every element of this array must be initialized with the value + \CONST{SHMEM\_SYNC\_VALUE} before any of the \acp{PE} in the active set + enter the reduction routine.} + \end{apiarguments} \apidescription{ \openshmem reduction routines compute one or more reductions across symmetric arrays on multiple \acp{PE}. A reduction performs an associative binary routine - across a set of values. - + across a set of values. + The \VAR{nreduce} argument determines the number of separate reductions to - perform. The \source{} array on all \ac{PE}s in the \activeset{} provides one + perform. The \source{} array on all \acp{PE} in the active set provides one element for each reduction. The results of the reductions are placed in the - \dest{} array on all \ac{PE}s in the \activeset. The \activeset{} is defined + \dest{} array on all \acp{PE} in the active set. The active set is defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. - + The \source{} and \dest{} arrays may be the same array, but they may not be overlapping arrays. - - As with all \openshmem{} collective routines, each of these routines assumes - that only \ac{PE}s in the \activeset{} call the routine. If a \ac{PE} not in - the \activeset{} calls an \openshmem collective routine, undefined behavior - results. - + + As with all \openshmem collective routines, each of these routines assumes + that only \acp{PE} in the active set call the routine. If a \ac{PE} not in + the active set calls an \openshmem collective routine, the behavior is undefined. + The values of arguments \VAR{nreduce}, \VAR{PE\_start}, \VAR{logPE\_stride}, and - \VAR{PE\_size} must be equal on all \ac{PE}s in the \activeset. The same \dest{} + \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same \dest{} and \source{} arrays, and the same \VAR{pWrk} and \VAR{pSync} work arrays, must - be passed to all \ac{PE}s in the \activeset. - - Before any \ac{PE} calls a reduction routine, you must ensure that the - following conditions exist (synchronization via a \OPR{barrier} or some other - method is often needed to ensure this): The \VAR{pWrk} and \VAR{pSync} arrays - on all \ac{PE}s in the \activeset{} are not still in use from a prior call to a - collective \openshmem{} routine. The \dest{} array on all \ac{PE}s in the - \activeset{} is ready to accept the results of the \OPR{reduction}. - + be passed to all \acp{PE} in the active set. + + Before any \ac{PE} calls a reduction routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{pWrk} and \VAR{pSync} arrays on all \acp{PE} in the + active set are not still in use from a prior call to a collective + \openshmem routine. + \item The \dest{} array on all \acp{PE} in the active set is ready + to accept the results of the \OPR{reduction}. + \end{itemize} + Otherwise, the behavior is undefined. + Upon return from a reduction routine, the following are true for the local - \ac{PE}: The \dest{} array is updated. The values in the \VAR{pSync} array are + \ac{PE}: The \dest{} array is updated and the \source{} array may be safely reused. + The values in the \VAR{pSync} array are restored to the original values. + + + The complex-typed interfaces are only provided for sum and product reductions. + When the \Cstd translation environment does not support complex types + \footnote{That is, under \Cstd language standards prior to \Cstd[99] or under \Cstd[11] + when \CONST{\_\_STDC\_NO\_COMPLEX\_\_} is defined to 1}, an \openshmem + implementation is not required to provide support for these + complex-typed interfaces. } -\apidesctable{ + + +%\deprecationstart +\apidesctable{ When calling from \Fortran, the \dest{} date types are as follows: }{Routine}{Data type} \apitablerow{shmem\_int8\_and\_to\_all}{Integer, with an element size of 8 bytes.} @@ -234,8 +243,8 @@ \apitablerow{shmem\_real4\_sum\_to\_all}{Real, with an element size of 4 bytes.} \apitablerow{shmem\_real8\_sum\_to\_all}{Real, with an element size of 8 bytes.} \apitablerow{shmem\_real16\_sum\_to\_all}{Real, with an element size of 16 bytes.} - \apitablerow{shmem\_comp4\_prod\_to\_all}{ Complex, with an element size equal to two 4-byte real values. } - \apitablerow{shmem\_comp8\_prod\_to\_all}{ Complex, with an element size equal to two 8-byte real values.} + \apitablerow{shmem\_comp4\_prod\_to\_all}{Complex, with an element size equal to two 4-byte real values.} + \apitablerow{shmem\_comp8\_prod\_to\_all}{Complex, with an element size equal to two 8-byte real values.} \apitablerow{shmem\_int4\_prod\_to\_all}{Integer, with an element size of 4 bytes.} \apitablerow{shmem\_int8\_prod\_to\_all}{Integer, with an element size of 8 bytes.} \apitablerow{shmem\_real4\_prod\_to\_all}{Real, with an element size of 4 bytes.} @@ -246,73 +255,76 @@ \apitablerow{shmem\_int8\_xor\_to\_all}{Integer, with an element size of 8 bytes.} \apitablerow{shmem\_int4\_xor\_to\_all}{Integer, with an element size of 4 bytes.} +%\deprecationend + + \apireturnvalues{ None. } -\apinotes{ - All \openshmem{} reduction routines reset the values in \VAR{pSync} before they +\apinotes{ + All \openshmem reduction routines reset the values in \VAR{pSync} before they return, so a particular \VAR{pSync} buffer need only be initialized the first - time it is used. You must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} - in the \activeset{} while any of the \ac{PE}s participate in processing of an - \openshmem{} reduction routine. Be careful to avoid the following situations: If + time it is used. The user must ensure that the \VAR{pSync} array is not being updated on any \ac{PE} + in the active set while any of the \acp{PE} participate in processing of an + \openshmem reduction routine. Be careful to avoid the following situations: If the \VAR{pSync} array is initialized at run time, some type of synchronization - is needed to ensure that all \ac{PE}s in the working set have initialized + is needed to ensure that all \acp{PE} in the working set have initialized \VAR{pSync} before any of them enter an \openshmem routine called with the \VAR{pSync} synchronization array. A \VAR{pSync} or \VAR{pWrk} array can be - reused in a subsequent reduction routine call only if none of the \ac{PE}s in - the \activeset{} are still processing a prior reduction routine call that used + reused in a subsequent reduction routine call only if none of the \acp{PE} in + the active set are still processing a prior reduction routine call that used the same \VAR{pSync} or \VAR{pWrk} arrays. In general, this can be assured only - by doing some type of synchronization. + by doing some type of synchronization. } \begin{apiexamples} \apifexample - {This \Fortran{} reduction example statically initializes the \VAR{pSync} array + {This \Fortran reduction example statically initializes the \VAR{pSync} array and finds the logical \OPR{AND} of the integer variable \VAR{FOO} across all - even \ac{PE}s.} + even \acp{PE}.} {./example_code/shmem_and_example.f90} {} - + \apifexample - {This \Fortran{} example statically initializes the \VAR{pSync} array and finds - the \OPR{maximum} value of real variable \VAR{FOO} across all even \ac{PE}s.} + {This \Fortran example statically initializes the \VAR{pSync} array and finds + the \OPR{maximum} value of real variable \VAR{FOO} across all even \acp{PE}.} {./example_code/shmem_max_example.f90} {} \apifexample - { This \Fortran{} example statically initializes the \VAR{pSync} array and finds + { This \Fortran example statically initializes the \VAR{pSync} array and finds the \OPR{minimum} value of real variable \VAR{FOO} across all the even - \ac{PE}s.} + \acp{PE}.} {./example_code/shmem_min_example.f90} {} \apifexample - {This \Fortran{} example statically initializes the \VAR{pSync} array and finds - the \OPR{sum} of the real variable \VAR{FOO} across all even \ac{PE}s.} + {This \Fortran example statically initializes the \VAR{pSync} array and finds + the \OPR{sum} of the real variable \VAR{FOO} across all even \acp{PE}.} {./example_code/shmem_sum_example.f90} {} \apifexample - {This \Fortran{} example statically initializes the \VAR{pSync} array and finds - the \OPR{product} of the real variable \VAR{FOO} across all the even \ac{PE}s.} + {This \Fortran example statically initializes the \VAR{pSync} array and finds + the \OPR{product} of the real variable \VAR{FOO} across all the even \acp{PE}.} {./example_code/shmem_prod_example.f90} {} \apifexample - {This \Fortran{} example statically initializes the \VAR{pSync} array and finds + {This \Fortran example statically initializes the \VAR{pSync} array and finds the logical \OPR{OR} of the integer variable \VAR{FOO} across all even - \ac{PE}s.} + \acp{PE}.} {./example_code/shmem_or_example.f90} {} \apifexample - {This \Fortran{} example statically initializes the \VAR{pSync} array and + {This \Fortran example statically initializes the \VAR{pSync} array and computes the exclusive \OPR{XOR} of variable \VAR{FOO} across all even - \ac{PE}s.} + \acp{PE}.} {./example_code/shmem_xor_example.f90} - {} + {} \end{apiexamples} diff --git a/content/shmem_set.tex b/content/shmem_set.tex deleted file mode 100644 index b2d866122..000000000 --- a/content/shmem_set.tex +++ /dev/null @@ -1,52 +0,0 @@ -\apisummary{ - Atomically sets the value of a remote data object. -} - -\begin{apidefinition} - -\begin{C11synopsis} -void shmem_set(TYPE *dest, TYPE value, int pe); -\end{C11synopsis} -where \TYPE{} is one of the extended \ac{AMO} types specified by Table \ref{extamotypes}. - -\begin{Csynopsis} -void shmem__set(TYPE *dest, TYPE value, int pe); -\end{Csynopsis} -where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. - -\begin{Fsynopsis} -INTEGER pe -INTEGER*4 SHMEM_INT4_SET, value_i4 -CALL SHMEM_INT4_SET(dest, value_i4, pe) -INTEGER*8 SHMEM_INT8_SET, value_i8 -CALL SHMEM_INT8_SET(dest, value_i8, pe) -REAL*4 SHMEM_REAL4_SET, value_r4 -CALL SHMEM_REAL4_SET(dest, value_r4, pe) -REAL*8 SHMEM_REAL8_SET, value_r8 -CALL SHMEM_REAL8_SET(dest, value_r8, pe) -\end{Fsynopsis} - -\begin{apiarguments} - -\apiargument{IN}{dest}{The remotely accessible data object to be set on - the remote \ac{PE}.} -\apiargument{IN}{value}{The value to be atomically written to the remote \ac{PE}.} -\apiargument{IN}{pe}{An integer that indicates the \ac{PE} number on which - \VAR{dest} is to be updated.} - -\end{apiarguments} - -\apidescription{ - \FUNC{shmem\_set} performs an atomic set operation. It writes the - \VAR{value} into \VAR{dest} on \VAR{pe} as an atomic operation. -} - -\apireturnvalues{ - None. -} - -\apinotes{ - None. -} - -\end{apidefinition} diff --git a/content/shmem_swap.tex b/content/shmem_swap.tex deleted file mode 100644 index 143f42ddd..000000000 --- a/content/shmem_swap.tex +++ /dev/null @@ -1,76 +0,0 @@ -\apisummary{ - Performs an atomic swap to a remote data object. -} - -\begin{apidefinition} - -\begin{C11synopsis} -TYPE shmem_swap(TYPE *dest, TYPE value, int pe); -\end{C11synopsis} -where \TYPE{} is one of the extended \ac{AMO} types specified by Table \ref{extamotypes}. - -\begin{Csynopsis} -TYPE shmem__swap(TYPE *dest, TYPE value, int pe); -\end{Csynopsis} -where \TYPE{} is one of the extended \ac{AMO} types and has a corresponding \TYPENAME{} specified by Table \ref{extamotypes}. - -\begin{Fsynopsis} -INTEGER SHMEM_SWAP, value, pe -ires = SHMEM_SWAP(dest, value, pe) -INTEGER*4 SHMEM_INT4_SWAP, value_i4, ires_i4 -ires_i4 = SHMEM_INT4_SWAP(dest, value_i4, pe) -INTEGER*8 SHMEM_INT8_SWAP, value_i8, ires_i8 -ires_i8 = SHMEM_INT8_SWAP(dest, value_i8, pe) -REAL*4 SHMEM_REAL4_SWAP, value_r4, res_r4 -res_r4 = SHMEM_REAL4_SWAP(dest, value_r4, pe) -REAL*8 SHMEM_REAL8_SWAP, value_r8, res_r8 -res_r8 = SHMEM_REAL8_SWAP(dest, value_r8, pe) -\end{Fsynopsis} - -\begin{apiarguments} - \apiargument{OUT}{dest}{The remotely accessible integer data object to be - updated on the remote \ac{PE}. If you are using \CorCpp, the type of - \dest{} should match that implied in the SYNOPSIS section.} - \apiargument{IN}{value}{The value to be atomically written to the remote - \ac{PE}. \VAR{value} is the same type as \dest.} - \apiargument{IN}{pe}{ An integer that indicates the \ac{PE} number on which - \dest{} is to be updated. If you are using \Fortran, it must be a default - integer value.} -\end{apiarguments} - -\apidescription{ - \FUNC{shmem\_swap} performs an atomic swap operation. It writes \VAR{value} - into \dest{} on \ac{PE} and returns the previous contents of \dest{} as an - atomic operation. -} - -\apidesctable{ - If you are using \Fortran, \VAR{dest} must be of the following type: -}{Routine}{Data type of \VAR{dest} and \VAR{source}} - -\apitablerow{SHMEM\_SWAP}{Integer of default kind} -\apitablerow{SHMEM\_INT4\_SWAP}{\CONST{4}-byte integer} -\apitablerow{SHMEM\_INT8\_SWAP}{\CONST{8}-byte integer} -\apitablerow{SHMEM\_REAL4\_SWAP}{\CONST{4}-byte real} -\apitablerow{SHMEM\_REAL8\_SWAP}{\CONST{8}-byte real} - -\apireturnvalues{ - The content that had been at the \dest{} address on the remote \ac{PE} - prior to the swap is returned. -} - -\apinotes{ - None. -} - -\begin{apiexamples} - -\apicexample - {The example below swap values between odd numbered \acp{PE} and their right - (modulo) neighbor and outputs the result of swap.} - {./example_code/shmem_swap_example.c} - {} - -\end{apiexamples} - -\end{apidefinition} diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex new file mode 100644 index 000000000..2c5707929 --- /dev/null +++ b/content/shmem_sync.tex @@ -0,0 +1,80 @@ +\apisummary{ + Performs all operations described in the \FUNC{shmem\_sync\_all} interface + but with respect to a subset of \acp{PE} defined by the active set. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_sync}@(int PE_start, int logPE_stride, int PE_size, long *pSync); +\end{Csynopsis} + +\begin{apiarguments} + +\apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of + \acp{PE}. \VAR{PE\_start} must be of type integer.} +\apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between + consecutive \ac{PE} numbers in the active set. \VAR{logPE\_stride} must be + of type integer.} +\apiargument{IN}{PE\_size}{The number of \acp{PE} in the active set. + \VAR{PE\_size} must be of type integer.} +\apiargument{IN}{pSync}{A symmetric work array. In \CorCpp, \VAR{pSync} must be + of type \CTYPE{long} and size \CONST{SHMEM\_BARRIER\_SYNC\_SIZE}. Every element of + this array must be initialized to \CONST{SHMEM\_SYNC\_VALUE} before any of the + \acp{PE} in the active set enter \FUNC{shmem\_sync} the first time.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_sync} is a collective synchronization routine over an + active set. Control returns from \FUNC{shmem\_sync} after all \acp{PE} in + the active set (specified by \VAR{PE\_start}, \VAR{logPE\_stride}, and + \VAR{PE\_size}) have called \FUNC{shmem\_sync}. + + As with all \openshmem collective routines, each of these routines assumes + that only \acp{PE} in the active set call the routine. If a \ac{PE} not in + the active set calls an \openshmem collective routine, the behavior is undefined. + + The values of arguments \VAR{PE\_start}, \VAR{logPE\_stride}, and + \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same + work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. + + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only + ensures completion and visibility of previously issued memory stores and does not ensure + completion of remote memory updates issued via \openshmem routines. + + The same \VAR{pSync} array may be reused on consecutive calls to + \FUNC{shmem\_sync} if the same active set is used. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + If the \VAR{pSync} array is initialized at run time, another method of + synchronization (e.g., \FUNC{shmem\_sync\_all}) must be used before + the initial use of that \VAR{pSync} array by \FUNC{shmem\_sync}. + + If the active set does not change, \FUNC{shmem\_sync} can be called + repeatedly with the same \VAR{pSync} array. No additional synchronization + beyond that implied by \FUNC{shmem\_sync} itself is necessary in this case. + + The \FUNC{shmem\_sync} routine can be used to portably ensure that + memory access operations observe remote updates in the order enforced by the + initiator \acp{PE}, provided that the initiator PE ensures completion of remote + updates with a call to \FUNC{shmem\_quiet} prior to the call to the + \FUNC{shmem\_sync} routine. +} + +\begin{apiexamples} + +\apicexample + {The following \FUNC{shmem\_sync\_all} and \FUNC{shmem\_sync} example is + for \Cstd[11] programs:} + {./example_code/shmem_sync_example.c} + {} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_sync_all.tex b/content/shmem_sync_all.tex new file mode 100644 index 000000000..8d6c95244 --- /dev/null +++ b/content/shmem_sync_all.tex @@ -0,0 +1,43 @@ +\apisummary{ + Registers the arrival of a \ac{PE} at a barrier and suspends \ac{PE} + execution until all other \acp{PE} arrive at the barrier. +} + +\begin{apidefinition} + +\begin{Csynopsis} +void @\FuncDecl{shmem\_sync\_all}@(void); +\end{Csynopsis} + +\begin{apiarguments} + + \apiargument{None.}{}{} + +\end{apiarguments} + +\apidescription{ + The \FUNC{shmem\_sync\_all} routine registers the arrival of a \ac{PE} at a + barrier. Barriers are a fast mechanism for synchronizing all \acp{PE} at + once. This routine blocks the \ac{PE} until all \acp{PE} have called + \FUNC{shmem\_sync\_all}. In a multithreaded \openshmem + program, only the calling thread is blocked. + + In contrast with the \FUNC{shmem\_barrier\_all} routine, + \FUNC{shmem\_sync\_all} only ensures completion and visibility of previously issued memory + stores and does not ensure completion of remote memory updates issued via + \openshmem routines. +} + +\apireturnvalues{ + None. +} + +\apinotes{ + The \FUNC{shmem\_sync\_all} routine can be used to portably ensure that + memory access operations observe remote updates in the order enforced by the + initiator \acp{PE}, provided that the initiator PE ensures completion of remote + updates with a call to \FUNC{shmem\_quiet} prior to the call to the + \FUNC{shmem\_sync\_all} routine. +} + +\end{apidefinition} diff --git a/content/shmem_test.tex b/content/shmem_test.tex new file mode 100644 index 000000000..a7834af06 --- /dev/null +++ b/content/shmem_test.tex @@ -0,0 +1,54 @@ +\apisummary{ + Test whether a variable on the local \ac{PE} has changed. +} + +\begin{apidefinition} + +\begin{C11synopsis} +int @\FuncDecl{shmem\_test}@(TYPE *ivar, int cmp, TYPE cmp_value); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_test}@(TYPE *ivar, int cmp, TYPE cmp_value); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table \ref{p2psynctypes}. + +\begin{apiarguments} + + \apiargument{OUT}{ivar}{A pointer to a remotely accessible data object.} + \apiargument{IN}{cmp}{The comparison operator that compares \VAR{ivar} with + \VAR{cmp\_value}.} + \apiargument{IN}{cmp\_value}{The value against which the object pointed to + by \VAR{ivar} will be compared.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_test} tests the numeric comparison of the symmetric object + pointed to by \VAR{ivar} with the value \VAR{cmp\_value} according to the + comparison operator \VAR{cmp}. +} + +\apireturnvalues{ + \FUNC{shmem\_test} returns 1 if the comparison of the symmetric object + pointed to by \VAR{ivar} with the value \VAR{cmp\_value} according to the + comparison operator \VAR{cmp} evalutes to true; otherwise, it returns 0. +} + +\apinotes{ + None. +} + +\begin{apiexamples} + \apicexample + {The following example demonstrates the use of \FUNC{shmem\_test} to + wait on an array of symmetric objects and return the index of an + element that satisfies the specified condition.} + {./example_code/shmem_test_example1.c} + {} +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shmem_wait.tex b/content/shmem_wait.tex deleted file mode 100644 index 7985e5a85..000000000 --- a/content/shmem_wait.tex +++ /dev/null @@ -1,133 +0,0 @@ -\apisummary{ - Wait for a variable on the local \ac{PE} to change. -} - -\begin{apidefinition} - -\begin{Csynopsis} -void shmem_int_wait(volatile int *ivar, int cmp_value); -void shmem_int_wait_until(volatile int *ivar, int cmp, int cmp_value); -void shmem_long_wait(volatile long *ivar, long cmp_value); -void shmem_long_wait_until(volatile long *ivar, int cmp, long cmp_value); -void shmem_longlong_wait(volatile long long *ivar, long long cmp_value); -void shmem_longlong_wait_until(volatile long long *ivar, int cmp, long long cmp_value); -void shmem_short_wait(volatile short *ivar, short cmp_value); -void shmem_short_wait_until(volatile short *ivar, int cmp, short cmp_value); -void shmem_wait(volatile long *ivar, long cmp_value); -void shmem_wait_until(volatile long *ivar, int cmp, long cmp_value); -\end{Csynopsis} - -\begin{Fsynopsis} -CALL SHMEM_INT4_WAIT(ivar, cmp_value) -CALL SHMEM_INT4_WAIT_UNTIL(ivar, cmp, cmp_value) -CALL SHMEM_INT8_WAIT(ivar, cmp_value) -CALL SHMEM_INT8_WAIT_UNTIL(ivar, cmp, cmp_value) -CALL SHMEM_WAIT(ivar, cmp_value) -CALL SHMEM_WAIT_UNTIL(ivar, cmp, cmp_value) -\end{Fsynopsis} - -\begin{apiarguments} - -\apiargument{OUT}{ivar}{A remotely accessible integer variable that is being updated - by another \ac{PE}. If you are using \CorCpp, the type of \VAR{ivar} should - match that implied in the SYNOPSIS section.} -\apiargument{IN}{cmp}{The compare operator that compares \VAR{ivar} with - \VAR{cmp\_value}. \VAR{cmp} must be of type integer. If you are using - \Fortran, it must be of default kind. If you are using \CorCpp, the type of - \VAR{cmp} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. If you are - using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the - SYNOPSIS section. If you are using \Fortran, cmp\_value must be an integer of - the same size and kind as \VAR{ivar}.} - -\end{apiarguments} - -\apidescription{ - \FUNC{shmem\_wait} and \FUNC{shmem\_wait\_until} wait for \VAR{ivar} to be - changed by a remote write or an atomic operation issued by a different \ac{PE}. - These routines can be used for point-to-point direct synchronization. A call - to \VAR{shmem\_wait} does not return until some other \ac{PE} writes a value, - not equal to \VAR{cmp\_value}, into \VAR{ivar} on the waiting \ac{PE}. A call - to \FUNC{shmem\_wait\_until} does not return until some other \ac{PE} changes - \VAR{ivar} to satisfy the condition implied by \VAR{cmp} and \VAR{cmp\_value}. - This mechanism is useful when a \ac{PE} needs to tell another \ac{PE} that it - has completed some action. The \FUNC{shmem\_wait} routines return when - \VAR{ivar} is no longer equal to \VAR{cmp\_value}. The - \FUNC{shmem\_wait\_until} routines return when the compare condition is true. - The compare condition is defined by the \VAR{ivar} argument compared with the - \VAR{cmp\_value} using the comparison operator, \VAR{cmp}. -} - - -\apidesctable{ - If you are using \Fortran, \VAR{ivar} must be a specific sized integer type - according to the routine being called, as follows: -}{Routine}{Data type} - -\apitablerow{shmem\_wait, shmem\_wait\_until}{default INTEGER} -\apitablerow{shmem\_int4\_wait, shmem\_int4\_wait\_until}{INTEGER*4} -\apitablerow{shmem\_int8\_wait, shmem\_int8\_wait\_until}{INTEGER*8} - -\apidesctable{ - The following \VAR{cmp} values are supported: -}{CMP Value}{Comparison} - -\CorCpp:\\ -\apitablerow{SHMEM\_CMP\_EQ }{ Equal} -\apitablerow{SHMEM\_CMP\_NE}{Not equal} -\apitablerow{SHMEM\_CMP\_GT}{Greater than} -\apitablerow{SHMEM\_CMP\_LE}{Less than or equal to} -\apitablerow{SHMEM\_CMP\_LT}{Less than} -\apitablerow{SHMEM\_CMP\_GE}{Greater than or equal to} -\\ -\Fortran:\\ -\apitablerow{SHMEM\_CMP\_EQ }{ Equal} -\apitablerow{SHMEM\_CMP\_NE}{Not equal} -\apitablerow{SHMEM\_CMP\_GT}{Greater than} -\apitablerow{SHMEM\_CMP\_LE}{Less than or equal to} -\apitablerow{SHMEM\_CMP\_LT}{Less than} -\apitablerow{SHMEM\_CMP\_GE}{Greater than or equal to} - -\apireturnvalues{ - None. -} - -\apinotes{ - None. -} - -\apiimpnotes{ - Implementations must ensure that \FUNC{shmem\_wait} and - \FUNC{shmem\_wait\_until} do not return before the update of the memory - indicated by \VAR{ivar} is fully complete. Partial updates to the memory - must not cause \FUNC{shmem\_wait} or \FUNC{shmem\_wait\_until} to return. -} - - -\begin{apiexamples} - -\apifexample -{ The following call returns when variable ivar is not equal to 100:} -{./example_code/shmem_wait1_example.f90} -{} - -\apifexample -{ The following call to \FUNC{SHMEM\_INT8\_WAIT\_UNTIL} is equivalent to the -call to \FUNC{SHMEM\_INT8\_WAIT} in example 1:} -{./example_code/shmem_wait2_example.f90} -{} - -\apicexample -{The following \CorCpp{} call waits until the sign bit in ivar is set by a -transfer from a remote PE:} -{./example_code/shmem_wait3_example.f90} -{} - -\apifexample -{The following \Fortran{} example is in the context of a subroutine:} -{./example_code/shmem_wait4_example.f90} -{} - -\end{apiexamples} - -\end{apidefinition} diff --git a/content/shmem_wait_until.tex b/content/shmem_wait_until.tex new file mode 100644 index 000000000..906b1a104 --- /dev/null +++ b/content/shmem_wait_until.tex @@ -0,0 +1,121 @@ +\apisummary{ + Wait for a variable on the local \ac{PE} to change. +} + +\begin{apidefinition} + +\begin{C11synopsis} +void @\FuncDecl{shmem\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); +\end{C11synopsis} +where \TYPE{} is one of the point-to-point synchronization types specified by +Table \ref{p2psynctypes}. + +\begin{Csynopsis} +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait\_until}@(TYPE *ivar, int cmp, TYPE cmp_value); +\end{Csynopsis} +where \TYPE{} is one of the point-to-point synchronization types and has a +corresponding \TYPENAME{} specified by Table~\ref{p2psynctypes}. + +\begin{DeprecateBlock} +\begin{CsynopsisCol} +void @\FuncDecl{shmem\_wait\_until}@(long *ivar, int cmp, long cmp_value); +void @\FuncDecl{shmem\_wait}@(long *ivar, long cmp_value); +void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_wait}@(TYPE *ivar, TYPE cmp_value); +\end{CsynopsisCol} +where \TYPE{} is one of \{\CTYPE{short}, \CTYPE{int}, \CTYPE{long}, +\CTYPE{long long}\} and has a corresponding \TYPENAME{} specified by +Table~\ref{p2psynctypes}. +\end{DeprecateBlock} + +\begin{Fsynopsis} +CALL @\FuncDecl{SHMEM\_INT4\_WAIT}@(ivar, cmp_value) +CALL @\FuncDecl{SHMEM\_INT4\_WAIT\_UNTIL}@(ivar, cmp, cmp_value) +CALL @\FuncDecl{SHMEM\_INT8\_WAIT}@(ivar, cmp_value) +CALL @\FuncDecl{SHMEM\_INT8\_WAIT\_UNTIL}@(ivar, cmp, cmp_value) +CALL @\FuncDecl{SHMEM\_WAIT}@(ivar, cmp_value) +CALL @\FuncDecl{SHMEM\_WAIT\_UNTIL}@(ivar, cmp, cmp_value) +\end{Fsynopsis} + +\begin{apiarguments} + +\apiargument{OUT}{ivar}{A remotely accessible integer variable. When using \CorCpp, + the type of \VAR{ivar} should match that implied in the SYNOPSIS section.} +\apiargument{IN}{cmp}{The compare operator that compares \VAR{ivar} with + \VAR{cmp\_value}. When using \Fortran, it must be of default kind. + When using \CorCpp, it must be of type \CTYPE{int}.} +\apiargument{IN}{cmp\_value}{\VAR{cmp\_value} must be of type integer. When + using \CorCpp, the type of \VAR{cmp\_value} should match that implied in the + SYNOPSIS section. When using \Fortran, cmp\_value must be an integer of + the same size and kind as \VAR{ivar}.} + +\end{apiarguments} + +\apidescription{ + \FUNC{shmem\_wait} and \FUNC{shmem\_wait\_until} wait for \VAR{ivar} to be + changed by a write or an atomic operation issued by a \ac{PE}. + These routines can be used for point-to-point direct synchronization. A call + to \FUNC{shmem\_wait} does not return until a \ac{PE} writes a value + not equal to \VAR{cmp\_value} into \VAR{ivar} on the waiting \ac{PE}. A call + to \FUNC{shmem\_wait\_until} does not return until a \ac{PE} changes + \VAR{ivar} to satisfy the condition implied by \VAR{cmp} and \VAR{cmp\_value}. + The \FUNC{shmem\_wait} routines return when \VAR{ivar} is no longer equal to \VAR{cmp\_value}. The + \FUNC{shmem\_wait\_until} routines return when the compare condition is true. + The compare condition is defined by the \VAR{ivar} argument compared with the + \VAR{cmp\_value} using the comparison operator \VAR{cmp}. +} + + +\apidesctable{ + When using \Fortran, \VAR{ivar} must be a specific sized integer type + according to the routine being called, as follows: +}{Routine}{Data type} + +\apitablerow{shmem\_wait, shmem\_wait\_until}{default INTEGER} +\apitablerow{shmem\_int4\_wait, shmem\_int4\_wait\_until}{INTEGER*4} +\apitablerow{shmem\_int8\_wait, shmem\_int8\_wait\_until}{INTEGER*8} + +\apireturnvalues{ + None. +} + +\apinotes{ + As of \openshmem[1.4], the \FUNC{shmem\_wait} routine is deprecated, + however, \FUNC{shmem\_wait} is equivalent to \FUNC{shmem\_wait\_until} + where \VAR{cmp} is \CONST{SHMEM\_CMP\_NE}. +} + +\apiimpnotes{ + Implementations must ensure that \FUNC{shmem\_wait} and + \FUNC{shmem\_wait\_until} do not return before the update of the memory + indicated by \VAR{ivar} is fully complete. Partial updates to the memory + must not cause \FUNC{shmem\_wait} or \FUNC{shmem\_wait\_until} to return. +} + + +\begin{apiexamples} + +\apifexample +{ The following call returns when variable \VAR{ivar} is not equal to 100:} +{./example_code/shmem_wait1_example.f90} +{} + +\apifexample +{ The following call to \FUNC{SHMEM\_INT8\_WAIT\_UNTIL} is equivalent to the +call to \FUNC{SHMEM\_INT8\_WAIT} in example 1:} +{./example_code/shmem_wait2_example.f90} +{} + +\apicexample +{The following \CorCpp{} call waits until the value in \VAR{ivar} is set to +be less than zero by a transfer from a remote PE:} +{./example_code/shmem_wait3_example.f90} +{} + +\apifexample +{The following \Fortran example is in the context of a subroutine:} +{./example_code/shmem_wait4_example.f90} +{} + +\end{apiexamples} + +\end{apidefinition} diff --git a/content/shpalloc.tex b/content/shpalloc.tex index beccfe774..fb63060b1 100644 --- a/content/shpalloc.tex +++ b/content/shpalloc.tex @@ -7,7 +7,7 @@ \begin{Fsynopsis} POINTER (addr, A(1)) INTEGER length, errcode, abort -CALL SHPALLOC(addr, length, errcode, abort) +CALL @\FuncDecl{SHPALLOC}@(addr, length, errcode, abort) \end{Fsynopsis} \begin{apiarguments} @@ -22,11 +22,11 @@ \apidescription{ \FUNC{SHPALLOC} allocates a block of memory from the program's symmetric heap that is greater than or equal to the size requested. To maintain symmetric heap - consistency, all \ac{PE}s in an program must call \FUNC{SHPALLOC} with the same - value of length; if any \ac{PE}s are missing, the program will hang. + consistency, all \acp{PE} in an program must call \FUNC{SHPALLOC} with the same + value of length; if any \acp{PE} are missing, the program will hang. - By using the \Fortran{} \CONST{POINTER} mechanism in the following manner, you - can use array \VAR{A} to refer to the block allocated by \FUNC{SHPALLOC}: + By using the \Fortran \CONST{POINTER} mechanism in the following manner, + array \VAR{A} can be used to refer to the block allocated by \FUNC{SHPALLOC}: \CONST{POINTER} (\VAR{addr}, \VAR{A}()) } @@ -38,13 +38,13 @@ \apinotes{ The total size of the symmetric heap is determined at job startup. One may - adjust the size of the heap using the \CONST{SMA\_SYMMETRIC\_SIZE} environment + adjust the size of the heap using the \VAR{SHMEM\_SYMMETRIC\_SIZE} environment variable (if available). } \apiimpnotes{ The symmetric heap allocation routines always return a pointer to corresponding - symmetric objects across all PEs. The \openshmem{} specification does not + symmetric objects across all \acp{PE}. The \openshmem specification does not require that the virtual addresses are equal across all \acp{PE}. Nevertheless, the implementation must avoid costly address translation operations in the communication path, including order $N$ (where $N$ is the number of \acp{PE}) diff --git a/content/shpclmove.tex b/content/shpclmove.tex index f3b5c9635..726a86061 100644 --- a/content/shpclmove.tex +++ b/content/shpclmove.tex @@ -8,7 +8,7 @@ \begin{Fsynopsis} POINTER (addr, A(1)) INTEGER length, status, abort -CALL SHPCLMOVE (addr, length, status, abort) +CALL @\FuncDecl{SHPCLMOVE}@(addr, length, status, abort) \end{Fsynopsis} \begin{apiarguments} @@ -28,9 +28,9 @@ is followed by a large enough free block or copies the contents of the existing block to a larger block and returns a status code indicating that the block was moved. This routine also can reduce the size of a block if the new length is - less than the old length. All \ac{PE}s in a program must call + less than the old length. All \acp{PE} in a program must call \FUNC{SHPCLMOVE} with the same value of \VAR{addr} to maintain symmetric heap - consistency; if any \ac{PE}s are missing, the program hangs. + consistency; if any \acp{PE} are missing, the program hangs. } \apireturnvalues{} diff --git a/content/shpdealloc.tex b/content/shpdeallc.tex similarity index 88% rename from content/shpdealloc.tex rename to content/shpdeallc.tex index 047d072d6..f14a557c3 100644 --- a/content/shpdealloc.tex +++ b/content/shpdeallc.tex @@ -7,7 +7,7 @@ \begin{Fsynopsis} POINTER (addr, A(1)) INTEGER errcode, abort -CALL SHPDEALLC(addr, errcode, abort) +CALL @\FuncDecl{SHPDEALLC}@(addr, errcode, abort) \end{Fsynopsis} \begin{apiarguments} @@ -21,8 +21,8 @@ \apidescription{ SHPDEALLC returns a block of memory (allocated using \FUNC{SHPALLOC}) to the list of available space in the symmetric heap. To maintain symmetric heap - consistency, all \ac{PE}s in a program must call \FUNC{SHPDEALLC} with the same - value of \VAR{addr}; if any \ac{PE}s are missing, the program hangs. + consistency, all \acp{PE} in a program must call \FUNC{SHPDEALLC} with the same + value of \VAR{addr}; if any \acp{PE} are missing, the program hangs. } \apireturnvalues{} diff --git a/content/start_pes.tex b/content/start_pes.tex index 11679fe78..a88d55fc0 100644 --- a/content/start_pes.tex +++ b/content/start_pes.tex @@ -7,12 +7,14 @@ \begin{apidefinition} +\begin{DeprecateBlock} \begin{Csynopsis} -void start_pes(int npes); +void @\FuncDecl{start\_pes}@(int npes); \end{Csynopsis} +\end{DeprecateBlock} \begin{Fsynopsis} -CALL START_PES(npes) +CALL @\FuncDecl{START\_PES}@(npes) \end{Fsynopsis} \begin{apiarguments} @@ -21,8 +23,13 @@ \apidescription{ The \FUNC{start\_pes} routine initializes the \openshmem execution - environment. An \openshmem program must call \FUNC{start\_pes} before - calling any other \openshmem routine. + environment. An \openshmem program must call \FUNC{start\_pes}, + \FUNC{shmem\_init}, or \FUNC{shmem\_init\_thread} before calling any other \openshmem routine. Unlike + \FUNC{shmem\_init} and \FUNC{shmem\_init\_thread}, \FUNC{start\_pes} does not require a call to + \FUNC{shmem\_finalize}. Instead, the \openshmem library is implicitly + finalized when the program exits. Implicit finalization is collective and + includes a global synchronization to ensure that all pending communication + is completed before resources are released. } \apireturnvalues{ @@ -36,9 +43,10 @@ Calling \FUNC{start\_pes} more than once has no subsequent effect. - As of \openshmem Specification 1.2 the use of \FUNC{start\_pes} has + As of \openshmem[1.2] the use of \FUNC{start\_pes} has been deprecated. Although \openshmem libraries are required to support the - call, program users are encouraged to use \FUNC{shmem\_init} instead. + call, users are encouraged to use \FUNC{shmem\_init} or + \FUNC{shmem\_init\_thread} instead. } diff --git a/content/synchronization_model.tex b/content/synchronization_model.tex index b8bf865a0..8d3dd4b15 100644 --- a/content/synchronization_model.tex +++ b/content/synchronization_model.tex @@ -1,7 +1,7 @@ When using the \openshmem \ac{API}, synchronization, ordering, and completion of -communication become critical. The updates via \PUT{} routines, \acp{AMO} and -store routines on symmetric data cannot be guaranteed until some form of -synchronization or ordering is introduced by the program user. The table below +communication become critical. The updates via \PUT{} routines, \acp{AMO}, stores, and +nonblocking \PUT{} and \GET{} routines on symmetric data cannot be guaranteed until some form of +synchronization or ordering is introduced in the user's program. The table below gives the different synchronization and ordering choices, and the situations where they may be useful.\\ @@ -11,7 +11,7 @@ \hline \hline {Point-to-point synchronization}\\ -\FUNC{shmem\_wait}, \FUNC{shmem\_wait\_until} +\FUNC{shmem\_wait\_until} & \raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{figures/wait}} \end{tabular} @@ -19,9 +19,9 @@ \begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} {} & -{ Waits for a symmetric variable to be updated by a remote \ac{PE}. Should be +Waits for a symmetric variable to be updated by a remote \ac{PE}. Should be used when computation on the local \ac{PE} cannot proceed without the value that -the remote \ac{PE} is to update.} \tabularnewline +the remote \ac{PE} is to update. \tabularnewline \hline \end{tabular} @@ -36,7 +36,7 @@ \begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} {} & -All \PUT{} routines, \acp{AMO} and store routines on symmetric data issued to +All \PUT{}, \ac{AMO}, store, and nonblocking \PUT{} routines on symmetric data issued to same \ac{PE} are guaranteed to be delivered before Puts (to the same \ac{PE}) issued after the \FUNC{fence} call. \tabularnewline \hline @@ -56,17 +56,17 @@ \begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} {} & -{All \PUT{} routines, \acp{AMO} and store routines on symmetric data issued by a -local \ac{PE} to all remote \ac{PE}s are guaranteed to be completed and visible +{All \PUT{}, \ac{AMO}, store, and nonblocking \PUT{} and \GET{} routines on symmetric data issued by a +local \ac{PE} to all remote \acp{PE} are guaranteed to be completed and visible once quiet returns. This routine should be used when all remote writes issued by -a local \ac{PE} need to be visible to all other \ac{PE}s before the local +a local \ac{PE} need to be visible to all other \acp{PE} before the local \ac{PE} proceeds. } \tabularnewline \hline \end{tabular} \begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} -Collective synchronization over an \activeset \\ +Collective synchronization over an active set \\ \FUNC{shmem\_barrier} & \raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{figures/barrier}} @@ -75,13 +75,13 @@ \begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} {} & -{All local and remote memory operations issued by all \ac{PE}s within the -\activeset{} are guaranteed to be completed before any \ac{PE} in the -\activeset{} returns from the call. Additionally, no \ac{PE} my return from the -barrier until all \ac{PE}s in the \activeset{} have entered the same barrier +{All local and remote memory operations issued by all \acp{PE} within the +active set are guaranteed to be completed before any \ac{PE} in the +active set returns from the call. Additionally, no \ac{PE} shall return from the +barrier until all \acp{PE} in the active set have entered the same barrier call. This routine should be used when synchronization as well as completion of all stores and remote memory updates via \openshmem is required over a sub set -of the executing \ac{PE}s.} \tabularnewline +of the executing \acp{PE}.} \tabularnewline \hline \end{tabular} @@ -90,7 +90,7 @@ \textbf{\openshmem \ac{API}} & \centering \textbf{Working of \openshmem \ac{API}} \tabularnewline \hline \hline -{Collective synchronization over all \ac{PE}s} \\ +{Collective synchronization over all \acp{PE}} \\ \FUNC{shmem\_barrier\_all} & \raisebox{-\totalheight}{\includegraphics[width=0.7\textwidth]{figures/barrierall}} @@ -99,12 +99,12 @@ \begin{tabular}{p{0.2\textwidth} | p{0.7\textwidth}} {} & -{All local and remote memory operations issued by all \ac{PE}s are guaranteed to +{All local and remote memory operations issued by all \acp{PE} are guaranteed to be completed before any \ac{PE} returns from the call. Additionally no \ac{PE} -shall return from the barrier until all \ac{PE}s have entered the same +shall return from the barrier until all \acp{PE} have entered the same \FUNC{shmem\_barrier\_all} call. This routine should be used when synchronization as well as completion of all stores and remote memory updates -via \openshmem is required over all \ac{PE}s. } \tabularnewline +via \openshmem is required over all \acp{PE}. } \tabularnewline \hline \end{tabular} \clearpage diff --git a/content/the_openshmem_effort.tex b/content/the_openshmem_effort.tex index 2194ff6f4..2eaa0c619 100644 --- a/content/the_openshmem_effort.tex +++ b/content/the_openshmem_effort.tex @@ -3,8 +3,8 @@ facilitate uniform predictable results of \openshmem programs by explicitly stating the behavior and semantics of the \openshmem library calls. Through the different versions, \openshmem will continue to address the requirements of the -\ac{PGAS} community. As of this specification, existing vendors are moving -towards \openshmem compliant implementations and new vendors are developing +\ac{PGAS} community. As of this specification, many existing vendors support +\openshmem-compliant implementations and new vendors are developing \openshmem library implementations to help the users write portable \openshmem code. This ensures that programs can run on multiple platforms without having to deal with subtle vendor-specific implementation differences. For more details on @@ -13,9 +13,10 @@ The \openshmem\footnote{The \openshmem specification is owned by Open Source Software Solutions Inc., a non-profit organization, under an agreement with -SGI.} effort is driven by the \ac{ESSC} at \ac{ORNL} and the University of -Houston with significant input from the \openshmem community. Besides the -specification, the effort also includes providing a reference \openshmem +\ac{HPE}.} effort is driven by the \ac{DoD} with continuous input from the \openshmem community. +To see all of the contributors and participants for the \openshmem API, +please see: \url{http://www.openshmem.org/site/Contributors}. In addition to the +specification, the effort includes a reference \openshmem implementation, validation and verification suites, tools, a mailing list and website infrastructure to support specification activities. For more information please refer to: \url{http://www.openshmem.org/}. diff --git a/content/threads_intro.tex b/content/threads_intro.tex new file mode 100644 index 000000000..71dade746 --- /dev/null +++ b/content/threads_intro.tex @@ -0,0 +1,64 @@ +This section specifies the interaction between the \openshmem interfaces and +user threads. It also describes the routines that can be used for initializing and +querying the thread environment. There are four levels of threading defined by +the \openshmem specification. + +\begin{description} +\item[\LibConstRef{SHMEM\_THREAD\_SINGLE}] \hfill \\ +The \openshmem program must not be multithreaded. + +\item[\LibConstRef{SHMEM\_THREAD\_FUNNELED}] \hfill \\ +The \openshmem program may be multithreaded. However, the program must ensure +that only the main thread invokes the \openshmem interfaces. The main thread +is the thread that invokes either \FUNC{shmem\_init} or \FUNC{shmem\_init\_thread}. + +\item[\LibConstRef{SHMEM\_THREAD\_SERIALIZED}] \hfill \\ +The \openshmem program may be multithreaded. However, the program must ensure +that the \openshmem interfaces are not invoked concurrently by multiple threads. + +\item[\LibConstRef{SHMEM\_THREAD\_MULTIPLE}] \hfill \\ +The \openshmem program may be multithreaded and any thread may invoke the \openshmem +interfaces. +\end{description} + +\noindent The following semantics apply to the usage of these models: + +\begin{enumerate} +\item +In the \CONST{SHMEM\_THREAD\_FUNNELED}, \CONST{SHMEM\_THREAD\_SERIALIZED}, and +\CONST{SHMEM\_THREAD\_MULTIPLE} thread levels, the \FUNC{shmem\_init} and +\FUNC{shmem\_finalize} calls must be invoked by the same thread. + +\item +Any \openshmem operation initiated by a thread is considered an action of the +\ac{PE} as a whole. The symmetric heap and symmetric variables scope are not +impacted by multiple threads invoking the \openshmem interfaces. +Each \ac{PE} has a single symmetric data segment and symmetric heap that is shared by +all threads within that \ac{PE}. For example, a thread invoking a memory allocation +routine such as \FUNC{shmem\_malloc} allocates memory that is accessible by +all threads of the \ac{PE}. The requirement that the same symmetric heap operations +must be executed by all \acp{PE} in the same order also applies in a threaded +environment. Similarly, the completion of collective operations is not impacted +by multiple threads. For example, \FUNC{shmem\_barrier\_all} is completed when +all \acp{PE} enter and exit the \FUNC{shmem\_barrier\_all} call, even though +only one thread in the \ac{PE} is participating in the collective call. + +\item Blocking \openshmem calls will only block the calling thread, allowing +other threads, if available, to continue executing. The calling thread will +be blocked until the event on which it is waiting occurs. Once the blocking call is +completed, the thread is ready to continue execution. A blocked thread +will not prevent progress of other threads on the same \ac{PE} and will not +prevent them from executing other \openshmem calls when the thread level permits. +In addition, a blocked thread will not prevent the progress of \openshmem calls +performed on other \acp{PE}. + +\item In the \CONST{SHMEM\_THREAD\_MULTIPLE} thread level, all \openshmem calls are thread-safe. +Any two concurrently running threads may make \openshmem calls and the outcome +will be as if the calls executed in some order, even if their execution is interleaved. + +\item In the \CONST{SHMEM\_THREAD\_SERIALIZED} and \CONST{SHMEM\_THREAD\_MULTIPLE} thread levels, +if multiple threads call collective routines, including the symmetric heap +management routines, it is the programmer's responsibility to ensure the +correct ordering of collective calls. + +\end{enumerate} diff --git a/example_code/Makefile b/example_code/Makefile new file mode 100644 index 000000000..c37cfc99f --- /dev/null +++ b/example_code/Makefile @@ -0,0 +1,33 @@ +CC = oshcc +CFLAGS ?= -Wall -Wextra + +FC = oshfort +FFLAGS ?= -Wall -Wextra + +RUNCMD = oshrun +RUNOPT = -np 4 + +C_TESTS = $(wildcard *.c) +C_BINS = $(C_TESTS:.c=.cx) + +F_TESTS = $(wildcard *.f90) +F_BINS = $(F_TESTS:.f90=.fx) + +.PHONY: all run clean + +all: $(C_BINS) $(F_BINS) + +%.cx: %.c + $(CC) $(CFLAGS) -o $@ $+ + +%.fx: %.f90 + $(FC) $(FFLAGS) -o $@ $+ + +run: $(C_BINS) + @for bin in $+; do \ + echo --$$bin------------------------------; \ + $(RUNCMD) $(RUNOPT) ./$$bin || exit $$?; \ + done + +clean: + rm -f $(C_BINS) $(F_BINS) diff --git a/example_code/hello-openshmem.c b/example_code/hello-openshmem.c index 8102cee6d..8432b20dd 100644 --- a/example_code/hello-openshmem.c +++ b/example_code/hello-openshmem.c @@ -1,14 +1,12 @@ #include -#include /* The shmem header file */ +#include /* The OpenSHMEM header file */ -int -main (int argc, char *argv[]) +int main (void) { - int nprocs, me; - - shmem_init (); - nprocs = shmem_n_pes (); - me = shmem_my_pe (); - printf ("Hello from %d of %d\n", me, nprocs); - return 0; + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + printf("Hello from %d of %d\n", me, npes); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_add_example.c b/example_code/shmem_add_example.c deleted file mode 100644 index a94f0c5b6..000000000 --- a/example_code/shmem_add_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(void) -{ - int me, old; - static int dst; - - shmem_init(); - me = shmem_my_pe(); - - old = -1; - dst = 22; - shmem_barrier_all(); - - if (me == 1){ - old = shmem_add(&dst, 44, 0); - } - shmem_barrier_all(); - printf("%d: old = %d, dst = %d\n", me, old, dst); - return 0; -} diff --git a/example_code/shmem_alltoall_example.c b/example_code/shmem_alltoall_example.c index b29222626..13d11b75e 100644 --- a/example_code/shmem_alltoall_example.c +++ b/example_code/shmem_alltoall_example.c @@ -1,50 +1,46 @@ -#include #include - -long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; +#include +#include int main(void) { - int64_t *source, *dest; - int i, count, pe; - - shmem_init(); - - count = 2; - dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); - source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); - - /* assign source values */ - for (pe=0; pe #include - -long pSync[SHMEM_ALLTOALLS_SYNC_SIZE]; +#include +#include int main(void) { - int64_t *source, *dest; - int i, count, pe; - - shmem_init(); - - count = 2; - dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); - source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); - - /* assign source values */ - for (pe=0; pe #include -int dst; - int main(void) { - int me; - + static int dst = 22; shmem_init(); - me = shmem_my_pe(); - - dst = 74; + int me = shmem_my_pe(); + if (me == 1) + shmem_atomic_add(&dst, 44, 0); shmem_barrier_all(); - - if (me == 0) - shmem_int_inc(&dst, 1); - shmem_barrier_all(); - printf("%d: dst = %d\n", me, dst); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_atomic_compare_swap_example.c b/example_code/shmem_atomic_compare_swap_example.c new file mode 100644 index 000000000..a3b7743fb --- /dev/null +++ b/example_code/shmem_atomic_compare_swap_example.c @@ -0,0 +1,13 @@ +#include +#include + +int main(void) +{ + static int race_winner = -1; + shmem_init(); + int me = shmem_my_pe(); + int oldval = shmem_atomic_compare_swap(&race_winner, -1, me, 0); + if (oldval == -1) printf("PE %d was first\n", me); + shmem_finalize(); + return 0; +} diff --git a/_deprecated_sources/EXAMPLES/shmem_finc_example.c b/example_code/shmem_atomic_fetch_add_example.c similarity index 50% rename from _deprecated_sources/EXAMPLES/shmem_finc_example.c rename to example_code/shmem_atomic_fetch_add_example.c index 86d265b84..21a32cc5b 100644 --- a/_deprecated_sources/EXAMPLES/shmem_finc_example.c +++ b/example_code/shmem_atomic_fetch_add_example.c @@ -1,24 +1,16 @@ #include #include -int dst; - int main(void) { - int me; - int old; - + int old = -1; + static int dst = 22; shmem_init(); - me = shmem_my_pe(); - - old = -1; - dst = 22; - shmem_barrier_all(); - - if (me == 0) - old = shmem_int_finc(&dst, 1); - + int me = shmem_my_pe(); + if (me == 1) + old = shmem_atomic_fetch_add(&dst, 44, 0); shmem_barrier_all(); printf("%d: old = %d, dst = %d\n", me, old, dst); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_finc_example.c b/example_code/shmem_atomic_fetch_inc_example.c similarity index 55% rename from example_code/shmem_finc_example.c rename to example_code/shmem_atomic_fetch_inc_example.c index b1cd94552..f064136a4 100644 --- a/example_code/shmem_finc_example.c +++ b/example_code/shmem_atomic_fetch_inc_example.c @@ -1,24 +1,16 @@ #include #include -int dst; - int main(void) { - int me; - int old; - + int old = -1; + static int dst = 22; shmem_init(); - me = shmem_my_pe(); - - old = -1; - dst = 22; - shmem_barrier_all(); - + int me = shmem_my_pe(); if (me == 0) - old = shmem_finc(&dst, 1); - + old = shmem_atomic_fetch_inc(&dst, 1); shmem_barrier_all(); printf("%d: old = %d, dst = %d\n", me, old, dst); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_inc_example.c b/example_code/shmem_atomic_inc_example.c similarity index 59% rename from example_code/shmem_inc_example.c rename to example_code/shmem_atomic_inc_example.c index f67243eaa..a51f59d0e 100644 --- a/example_code/shmem_inc_example.c +++ b/example_code/shmem_atomic_inc_example.c @@ -1,22 +1,15 @@ #include #include -int dst; - int main(void) { - int me; - + static int dst = 74; shmem_init(); - me = shmem_my_pe(); - - dst = 74; - shmem_barrier_all(); - + int me = shmem_my_pe(); if (me == 0) - shmem_inc(&dst, 1); + shmem_atomic_inc(&dst, 1); shmem_barrier_all(); - printf("%d: dst = %d\n", me, dst); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_atomic_swap_example.c b/example_code/shmem_atomic_swap_example.c new file mode 100644 index 000000000..ac817271d --- /dev/null +++ b/example_code/shmem_atomic_swap_example.c @@ -0,0 +1,19 @@ +#include +#include + +int main(void) +{ + static long dest; + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + dest = me; + shmem_barrier_all(); + long new_val = me; + if (me & 1) { + long swapped_val = shmem_atomic_swap(&dest, new_val, (me + 1) % npes); + printf("%d: dest = %ld, swapped = %ld\n", me, dest, swapped_val); + } + shmem_finalize(); + return 0; +} diff --git a/example_code/shmem_barrier_example.c b/example_code/shmem_barrier_example.c index fb5d0eeb3..5fa27bec4 100644 --- a/example_code/shmem_barrier_example.c +++ b/example_code/shmem_barrier_example.c @@ -1,28 +1,24 @@ #include #include -long pSync[SHMEM_BARRIER_SYNC_SIZE]; -int x = 10101; - int main(void) { - int i, me, npes; - - for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i += 1){ + static int x = 10101; + static long pSync[SHMEM_BARRIER_SYNC_SIZE]; + for (int i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) pSync[i] = SHMEM_SYNC_VALUE; - } shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); - if(me % 2 == 0){ - x = 1000 + me; - /*put to next even PE in a circular fashion*/ - shmem_int_p(&x, 4, (me+2)%npes); - /*synchronize all even pes*/ - shmem_barrier(0, 1, (npes/2 + npes%2), pSync); + if (me % 2 == 0) { + /* put to next even PE in a circular fashion */ + shmem_p(&x, 4, (me + 2) % npes); + /* synchronize all even pes */ + shmem_barrier(0, 1, (npes / 2 + npes % 2), pSync); } printf("%d: x = %d\n", me, x); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_barrierall_example.c b/example_code/shmem_barrierall_example.c index 5e4f430a9..bf23be573 100644 --- a/example_code/shmem_barrierall_example.c +++ b/example_code/shmem_barrierall_example.c @@ -1,21 +1,20 @@ #include #include -int x=1010; - int main(void) { - int me, npes; + static int x = 1010; shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); - /*put to next PE in a circular fashion*/ - shmem_int_p(&x, 4, (me+1)%npes); - /*synchronize all PEs*/ - shmem_barrier_all(); + /* put to next PE in a circular fashion */ + shmem_p(&x, 4, (me + 1) % npes); + /* synchronize all PEs */ + shmem_barrier_all(); printf("%d: x = %d\n", me, x); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_broadcast_example.c b/example_code/shmem_broadcast_example.c index 7fe7f7335..a829448ea 100644 --- a/example_code/shmem_broadcast_example.c +++ b/example_code/shmem_broadcast_example.c @@ -2,30 +2,23 @@ #include #include -#define NUM_ELEMS 4 -long pSync[SHMEM_BCAST_SYNC_SIZE]; -long source[NUM_ELEMS], dest[NUM_ELEMS]; - int main(void) { - int i, me, npes; + static long pSync[SHMEM_BCAST_SYNC_SIZE]; + for (int i = 0; i < SHMEM_BCAST_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + static long source[4], dest[4]; shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); if (me == 0) - for (i = 0; i < NUM_ELEMS; i++) + for (int i = 0; i < 4; i++) source[i] = i; - for (i=0; i < SHMEM_BCAST_SYNC_SIZE; i++) { - pSync[i] = SHMEM_SYNC_VALUE; - } - shmem_barrier_all(); /* Wait for all PEs to initialize pSync */ - shmem_broadcast64(dest, source, NUM_ELEMS, 0, 0, 0, npes, pSync); - printf("%d: %ld", me, dest[0]); - for (i = 1; i < NUM_ELEMS; i++) - printf(", %ld", dest[i]); - printf("\n"); + shmem_broadcast64(dest, source, 4, 0, 0, 0, npes, pSync); + printf("%d: %ld, %ld, %ld, %ld\n", me, dest[0], dest[1], dest[2], dest[3]); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_collect_example.c b/example_code/shmem_collect_example.c index 219f65daa..b73733368 100644 --- a/example_code/shmem_collect_example.c +++ b/example_code/shmem_collect_example.c @@ -2,30 +2,37 @@ #include #include -long pSync[SHMEM_COLLECT_SYNC_SIZE]; -int source[2]; - int main(void) { - int i, me, npes; - int *dest; + static long lock = 0; + static long pSync[SHMEM_COLLECT_SYNC_SIZE]; + for (int i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + int my_nelem = me + 1; /* linearly increasing number of elements with PE */ + int total_nelem = (npes * (npes + 1)) / 2; - source[0] = me * 2; - source[1] = me * 2 + 1; - dest = (int *)shmem_malloc(sizeof(int) * npes * 2); - for (i=0; i < SHMEM_COLLECT_SYNC_SIZE; i++) { - pSync[i] = SHMEM_SYNC_VALUE; - } - shmem_barrier_all(); /* Wait for all PEs to initialize pSync */ + int* source = (int*) shmem_malloc(npes*sizeof(int)); /* symmetric alloc */ + int* dest = (int*) shmem_malloc(total_nelem*sizeof(int)); + + for (int i = 0; i < my_nelem; i++) + source[i] = (me * (me + 1)) / 2 + i; + for (int i = 0; i < total_nelem; i++) + dest[i] = -9999; + + shmem_barrier_all(); /* Wait for all PEs to update source/dest */ + + shmem_collect32(dest, source, my_nelem, 0, 0, npes, pSync); - shmem_collect32(dest, source, 2, 0, 0, npes, pSync); + shmem_set_lock(&lock); /* Lock prevents interleaving printfs */ printf("%d: %d", me, dest[0]); - for (i = 1; i < npes * 2; i++) + for (int i = 1; i < total_nelem; i++) printf(", %d", dest[i]); printf("\n"); + shmem_clear_lock(&lock); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_cswap_example.c b/example_code/shmem_cswap_example.c deleted file mode 100644 index 54012fc46..000000000 --- a/example_code/shmem_cswap_example.c +++ /dev/null @@ -1,12 +0,0 @@ -#include -#include - -int main(void) -{ - static int race_winner = -1; - int oldval; - shmem_init(); - oldval = shmem_cswap(&race_winner, -1, shmem_my_pe(), 0); - if(oldval == -1) printf("pe %d was first\n",shmem_my_pe()); - return 1; -} diff --git a/example_code/shmem_ctx.c b/example_code/shmem_ctx.c new file mode 100644 index 000000000..61837f8f5 --- /dev/null +++ b/example_code/shmem_ctx.c @@ -0,0 +1,56 @@ +#include +#include + +long pwrk[SHMEM_REDUCE_MIN_WRKDATA_SIZE]; +long psync[SHMEM_REDUCE_SYNC_SIZE]; + +long task_cntr = 0; /* Next task counter */ +long tasks_done = 0; /* Tasks done by this PE */ +long total_done = 0; /* Total tasks done by all PEs */ + +int main(void) { + int tl, i; + long ntasks = 1024; /* Total tasks per PE */ + + for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) + psync[i] = SHMEM_SYNC_VALUE; + + shmem_init_thread(SHMEM_THREAD_MULTIPLE, &tl); + if (tl != SHMEM_THREAD_MULTIPLE) shmem_global_exit(1); + + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + +#pragma omp parallel reduction (+:tasks_done) + { + shmem_ctx_t ctx; + int task_pe = me, pes_done = 0; + int ret = shmem_ctx_create(SHMEM_CTX_PRIVATE, &ctx); + + if (ret != 0) { + printf("%d: Error creating context (%d)\n", me, ret); + shmem_global_exit(2); + } + + /* Process tasks on all PEs, starting with the local PE. After + * all tasks on a PE are completed, help the next PE. */ + while (pes_done < npes) { + long task = shmem_atomic_fetch_inc(ctx, &task_cntr, task_pe); + while (task < ntasks) { + /* Perform task (task_pe, task) */ + tasks_done++; + task = shmem_atomic_fetch_inc(ctx, &task_cntr, task_pe); + } + pes_done++; + task_pe = (task_pe + 1) % npes; + } + + shmem_ctx_destroy(ctx); + } + + shmem_long_sum_to_all(&total_done, &tasks_done, 1, 0, 0, npes, pwrk, psync); + + int result = (total_done != ntasks * npes); + shmem_finalize(); + return result; +} diff --git a/example_code/shmem_ctx_pipelined_reduce.c b/example_code/shmem_ctx_pipelined_reduce.c new file mode 100644 index 000000000..c21d7ff89 --- /dev/null +++ b/example_code/shmem_ctx_pipelined_reduce.c @@ -0,0 +1,56 @@ +#include +#include +#include + +#define LEN 8192 /* Full buffer length */ +#define PLEN 512 /* Length of each pipeline stage */ + +int in_buf[LEN], out_buf[LEN]; + +int main(void) { + int i, j, *pbuf[2]; + shmem_ctx_t ctx[2]; + + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + + pbuf[0] = shmem_malloc(PLEN * npes * sizeof(int)); + pbuf[1] = shmem_malloc(PLEN * npes * sizeof(int)); + + int ret_0 = shmem_ctx_create(0, &ctx[0]); + int ret_1 = shmem_ctx_create(0, &ctx[1]); + if (ret_0 || ret_1) shmem_global_exit(1); + + for (i = 0; i < LEN; i++) { + in_buf[i] = me; out_buf[i] = 0; + } + + int p_idx = 0, p = 0; /* Index of ctx and pbuf (p_idx) for current pipeline stage (p) */ + for (i = 1; i <= npes; i++) + shmem_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN*me], &in_buf[PLEN*p], + PLEN, (me+i) % npes); + + /* Issue communication for pipeline stage p, then accumulate results for stage p-1 */ + for (p = 1; p < LEN/PLEN; p++) { + p_idx ^= 1; + for (i = 1; i <= npes; i++) + shmem_put_nbi(ctx[p_idx], &pbuf[p_idx][PLEN*me], &in_buf[PLEN*p], + PLEN, (me+i) % npes); + + shmem_ctx_quiet(ctx[p_idx^1]); + shmem_sync_all(); + for (i = 0; i < npes; i++) + for (j = 0; j < PLEN; j++) + out_buf[PLEN*(p-1)+j] += pbuf[p_idx^1][PLEN*i+j]; + } + + shmem_ctx_quiet(ctx[p_idx]); + shmem_sync_all(); + for (i = 0; i < npes; i++) + for (j = 0; j < PLEN; j++) + out_buf[PLEN*(p-1)+j] += pbuf[p_idx][PLEN*i+j]; + + shmem_finalize(); + return 0; +} diff --git a/example_code/shmem_fadd_example.c b/example_code/shmem_fadd_example.c deleted file mode 100644 index f218c27ce..000000000 --- a/example_code/shmem_fadd_example.c +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include - -int main(void) -{ - int me, old; - static int dst; - - shmem_init(); - me = shmem_my_pe(); - - old = -1; - dst = 22; - shmem_barrier_all(); - - if (me == 1){ - old = shmem_fadd(&dst, 44, 0); - } - shmem_barrier_all(); - printf("%d: old = %d, dst = %d\n", me, old, dst); - return 0; -} diff --git a/example_code/shmem_fence_example.c b/example_code/shmem_fence_example.c index b5ddbaa88..3f72c8fb1 100644 --- a/example_code/shmem_fence_example.c +++ b/example_code/shmem_fence_example.c @@ -1,22 +1,23 @@ #include #include -long dest[10] = {0}; -int targ = 0; - int main(void) { - long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; - int src = 99; - shmem_init(); - if (shmem_my_pe() == 0) { - shmem_long_put(dest, source, 10, 1); /*put1*/ - shmem_long_put(dest, source, 10, 2); /*put2*/ - shmem_fence(); - shmem_int_put(&targ, &src, 1, 1); /*put3*/ - shmem_int_put(&targ, &src, 1, 2); /*put4*/ - } - shmem_barrier_all(); /* sync sender and receiver */ - printf("dest[0] on PE %d is %ld\n", shmem_my_pe(), dest[0]); - return 1; + int src = 99; + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + static long dest[10]; + static int targ; + shmem_init(); + int me = shmem_my_pe(); + if (me == 0) { + shmem_put(dest, source, 10, 1); /* put1 */ + shmem_put(dest, source, 10, 2); /* put2 */ + shmem_fence(); + shmem_put(&targ, &src, 1, 1); /* put3 */ + shmem_put(&targ, &src, 1, 2); /* put4 */ + } + shmem_barrier_all(); /* sync sender and receiver */ + printf("dest[0] on PE %d is %ld\n", me, dest[0]); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_finalize_example.c b/example_code/shmem_finalize_example.c index 0412810de..26d7860ea 100644 --- a/example_code/shmem_finalize_example.c +++ b/example_code/shmem_finalize_example.c @@ -1,18 +1,17 @@ #include #include -long x = 10101; - -int main(void) { - int me, npes; +int main(void) +{ + static long x = 10101; long y = -1; shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); - me = shmem_my_pe(); - npes = shmem_n_pes(); if (me == 0) - y = shmem_long_g(&x, npes-1); + y = shmem_g(&x, npes-1); printf("%d: y = %ld\n", me, y); diff --git a/example_code/shmem_g_example.c b/example_code/shmem_g_example.c index fd31d98ff..f1f49ee38 100644 --- a/example_code/shmem_g_example.c +++ b/example_code/shmem_g_example.c @@ -1,21 +1,16 @@ #include #include -long x = 10101; - int main(void) { - int me, npes; long y = -1; - + static long x = 10101; shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - + int me = shmem_my_pe(); + int npes = shmem_n_pes(); if (me == 0) y = shmem_g(&x, npes-1); - printf("%d: y = %ld\n", me, y); - + shmem_finalize(); return 0; } diff --git a/example_code/shmem_global_exit_example.c b/example_code/shmem_global_exit_example.c index 71c5f9fef..a3c21e379 100644 --- a/example_code/shmem_global_exit_example.c +++ b/example_code/shmem_global_exit_example.c @@ -2,27 +2,18 @@ #include #include -int -main(void) +int main(void) { - int me, npes; - - shmem_init(); - - me = shmem_my_pe(); - npes = shmem_n_pes(); - - if (me == 0) { - FILE *fp = fopen("input.txt", "r"); - - if (fp == NULL) { /* Input file required by program is not available */ - shmem_global_exit(EXIT_FAILURE); - } - - /* do something with the file */ - - fclose(fp); + shmem_init(); + int me = shmem_my_pe(); + if (me == 0) { + FILE *fp = fopen("input.txt", "r"); + if (fp == NULL) { /* Input file required by program is not available */ + shmem_global_exit(EXIT_FAILURE); + } + /* do something with the file */ + fclose(fp); } - + shmem_finalize(); return 0; } diff --git a/example_code/shmem_init_example.c b/example_code/shmem_init_example.c new file mode 100644 index 000000000..e3192e9ff --- /dev/null +++ b/example_code/shmem_init_example.c @@ -0,0 +1,23 @@ +#include +#include + +int main(void) { + static int targ = 0; + + shmem_init(); + int me = shmem_my_pe(); + int receiver = 1 % shmem_n_pes(); + + if (me == 0) { + int src = 33; + shmem_put(&targ, &src, 1, receiver); + } + + shmem_barrier_all(); /* Synchronizes sender and receiver */ + + if (me == receiver) + printf("PE %d targ=%d (expect 33)\n", me, targ); + + shmem_finalize(); + return 0; +} diff --git a/example_code/shmem_init_example.f90 b/example_code/shmem_init_example.f90 deleted file mode 100644 index 0649d519f..000000000 --- a/example_code/shmem_init_example.f90 +++ /dev/null @@ -1,22 +0,0 @@ -PROGRAM PUT -INCLUDE "shmem.fh" - -INTEGER TARG, SRC, RECEIVER, BAR -COMMON /T/ TARG -PARAMETER (RECEIVER=1) -CALL SHMEM_INIT() - -IF (SHMEM_MY_PE() .EQ. 0) THEN - SRC = 33 - CALL SHMEM_INTEGER_PUT(TARG, SRC, 1, RECEIVER) -ENDIF - -CALL SHMEM_BARRIER_ALL ! SYNCHRONIZES SENDER AND RECEIVER - -IF (SHMEM_MY_PE() .EQ. RECEIVER) THEN - PRINT*,'PE ', SHMEM_MY_PE(),' TARG=',TARG,' (expect 33)' -ENDIF - -CALL SHMEM_FINALIZE() - -END diff --git a/example_code/shmem_iput_example.c b/example_code/shmem_iput_example.c index d5ce61fa1..101533563 100644 --- a/example_code/shmem_iput_example.c +++ b/example_code/shmem_iput_example.c @@ -3,20 +3,17 @@ int main(void) { - short source[10] = { 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10 }; + short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static short dest[10]; shmem_init(); - if (shmem_my_pe() == 0) { - /* put 5 words into dest on PE 1 */ + int me = shmem_my_pe(); + if (me == 0) /* put 5 elements into dest on PE 1 */ shmem_iput(dest, source, 1, 2, 5, 1); + shmem_barrier_all(); /* sync sender and receiver */ + if (me == 1) { + printf("dest on PE %d is %hd %hd %hd %hd %hd\n", me, + dest[0], dest[1], dest[2], dest[3], dest[4]); } - shmem_barrier_all(); /* sync sender and receiver */ - if (shmem_my_pe() == 1) { - printf("dest on PE %d is %d %d %d %d %d\n", shmem_my_pe(), - (int)dest[0], (int)dest[1], (int)dest[2], - (int)dest[3], (int)dest[4] ); - } - shmem_barrier_all(); /* sync before exiting */ + shmem_finalize(); return 0; } diff --git a/example_code/shmem_lock_example.c b/example_code/shmem_lock_example.c index 838e43ecf..ab2a13fa3 100644 --- a/example_code/shmem_lock_example.c +++ b/example_code/shmem_lock_example.c @@ -1,22 +1,19 @@ #include -#include #include -long L = 0; -int main(int argc, char **argv) +int main(void) { - int me, slp; + static long lock = 0; + static int count = 0; shmem_init(); - me = shmem_my_pe(); - slp = 1; - shmem_barrier_all(); - if (me == 1) - sleep (3); - shmem_set_lock(&L); - printf("%d: sleeping %d second%s...\n", me, slp, slp == 1 ? "" : "s"); - sleep(slp); - printf("%d: sleeping...done\n", me); - shmem_clear_lock(&L); - shmem_barrier_all(); + int me = shmem_my_pe(); + shmem_set_lock(&lock); + int val = shmem_g(&count, 0); /* get count value on PE 0 */ + printf("%d: count is %d\n", me, val); + val++; /* incrementing and updating count on PE 0 */ + shmem_p(&count, val, 0); + shmem_quiet(); + shmem_clear_lock(&lock); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_mype_example.c b/example_code/shmem_mype_example.c deleted file mode 100644 index 762d56a43..000000000 --- a/example_code/shmem_mype_example.c +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -int main(void) -{ - int me; - - shmem_init(); - me = shmem_my_pe(); - printf("My PE id is: %d\n", me); - - return 0; -} diff --git a/example_code/shmem_npes_example.c b/example_code/shmem_npes_example.c index 788abda7d..6c6812841 100644 --- a/example_code/shmem_npes_example.c +++ b/example_code/shmem_npes_example.c @@ -3,15 +3,10 @@ int main(void) { - int npes; - - shmem_init(); - - npes = shmem_n_pes(); - - if (shmem_my_pe() == 0) { - printf("Number of PEs executing this program is: %d\n", npes); - } - - return 0; + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + printf("I am #%d of %d PEs executing this program\n", me, npes); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_p_example.c b/example_code/shmem_p_example.c index 58be8331b..8ebfd4e61 100644 --- a/example_code/shmem_p_example.c +++ b/example_code/shmem_p_example.c @@ -1,27 +1,19 @@ #include #include #include -static const double e = 2.71828182; -static const double epsilon = 0.00000001; int main(void) { - double *f; - int me; - + const double e = 2.71828182; + const double epsilon = 0.00000001; + static double f = 3.1415927; shmem_init(); - me = shmem_my_pe(); - f = (double *) shmem_malloc(sizeof (*f)); - - *f = 3.1415927; - shmem_barrier_all(); - + int me = shmem_my_pe(); if (me == 0) - shmem_p(f, e, 1); - + shmem_p(&f, e, 1); shmem_barrier_all(); if (me == 1) - printf("%s\n", (fabs (*f - e) < epsilon) ? "OK" : "FAIL"); - + printf("%s\n", (fabs(f - e) < epsilon) ? "OK" : "FAIL"); + shmem_finalize(); return 0; } diff --git a/example_code/shmem_ptr_example.c b/example_code/shmem_ptr_example.c index 217b26b61..9ac26ca4d 100644 --- a/example_code/shmem_ptr_example.c +++ b/example_code/shmem_ptr_example.c @@ -3,29 +3,21 @@ int main(void) { - static int bigd[100]; - int *ptr; - int i; - + static int dest[4]; shmem_init(); - - if (shmem_my_pe() == 0) { - /* initialize PE 1's bigd array */ - ptr = shmem_ptr(bigd, 1); + int me = shmem_my_pe(); + if (me == 0) { /* initialize PE 1's dest array */ + int* ptr = shmem_ptr(dest, 1); if (ptr == NULL) - printf("can't use pointer to directly access PE 1's array\n"); + printf("can't use pointer to directly access PE 1's dest array\n"); else - for (i=0; i<100; i++) - *ptr++ = i+1; + for (int i = 0; i < 4; i++) + *ptr++ = i + 1; } - shmem_barrier_all(); - - if (shmem_my_pe() == 1) { - printf("bigd on PE 1 is:\n"); - for (i=0; i<100; i++) - printf(" %d\n",bigd[i]); - printf("\n"); - } - return 1; + if (me == 1) + printf("PE 1 dest: %d, %d, %d, %d\n", + dest[0], dest[1], dest[2], dest[3]); + shmem_finalize(); + return 0; } diff --git a/example_code/shmem_put_example.c b/example_code/shmem_put_example.c index a61b56f63..a5f6ffb44 100644 --- a/example_code/shmem_put_example.c +++ b/example_code/shmem_put_example.c @@ -3,16 +3,14 @@ int main(void) { - long source[10] = { 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10 }; + long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static long dest[10]; shmem_init(); - if (shmem_my_pe() == 0) { - /* put 10 words into dest on PE 1 */ + int me = shmem_my_pe(); + if (me == 0) /* put 10 words into dest on PE 1 */ shmem_put(dest, source, 10, 1); - } - shmem_barrier_all(); /* sync sender and receiver */ - printf("dest[0] on PE %d is %ld\n", shmem_my_pe(), dest[0]); + shmem_barrier_all(); /* sync sender and receiver */ + printf("dest[0] on PE %d is %ld\n", me, dest[0]); + shmem_finalize(); return 0; } - diff --git a/example_code/shmem_quiet_example.c b/example_code/shmem_quiet_example.c index fb258a675..a23163eb0 100644 --- a/example_code/shmem_quiet_example.c +++ b/example_code/shmem_quiet_example.c @@ -1,32 +1,27 @@ #include #include -long dest[3] = {0}; -int targ = 0; -long source[3] = {1, 2, 3}; -int src = 90; - int main(void) { - long x[3] = {0}; - int y = 0; - - shmem_init(); - if (shmem_my_pe() == 0) { - shmem_long_put(dest, source, 3, 1); /*put1*/ - shmem_int_put(&targ, &src, 1, 2); /*put2*/ - - shmem_quiet(); - - shmem_long_get(x, dest, 3, 1); /*gets updated value from dest on PE 1 to local array x */ - shmem_int_get(&y, &targ, 1, 2); /*gets updated value from targ on PE 2 to local variable y*/ - printf("x: {%ld,%ld,%ld}\n",x[0],x[1],x[2]); /*x: {1,2,3}*/ - printf("y: %d\n", y); /*y: 90*/ - - shmem_int_put(&targ, &src, 1, 1); /*put3*/ - shmem_int_put(&targ, &src, 1, 2); /*put4*/ - } - shmem_barrier_all(); /* sync sender and receiver */ - return 0; + static long dest[3]; + static long source[3] = { 1, 2, 3 }; + static int targ; + static int src = 90; + long x[3] = { 0 }; + int y = 0; + shmem_init(); + int me = shmem_my_pe(); + if (me == 0) { + shmem_put(dest, source, 3, 1); /* put1 */ + shmem_put(&targ, &src, 1, 2); /* put2 */ + shmem_quiet(); + shmem_get(x, dest, 3, 1); /* gets updated value from dest on PE 1 to local array x */ + shmem_get(&y, &targ, 1, 2); /* gets updated value from targ on PE 2 to local variable y */ + printf("x: { %ld, %ld, %ld }\n", x[0], x[1], x[2]); /* x: { 1, 2, 3 } */ + printf("y: %d\n", y); /* y: 90 */ + shmem_put(&targ, &src, 1, 1); /* put3 */ + shmem_put(&targ, &src, 1, 2); /* put4 */ + } + shmem_finalize(); + return 0; } - diff --git a/example_code/shmem_swap_example.c b/example_code/shmem_swap_example.c deleted file mode 100644 index f47a35cd3..000000000 --- a/example_code/shmem_swap_example.c +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include - -int main(void) -{ - long *dest; - int me, npes; - long swapped_val, new_val; - - shmem_init(); - me = shmem_my_pe(); - npes = shmem_n_pes(); - dest = (long *) shmem_malloc(sizeof (*dest)); - *dest = me; - shmem_barrier_all(); - new_val = me; - if (me & 1){ - swapped_val = shmem_swap(dest, new_val, (me + 1) % npes); - printf("%d: dest = %ld, swapped = %ld\n", me, *dest, swapped_val); - } - shmem_free(dest); - return 0; -} diff --git a/example_code/shmem_sync_example.c b/example_code/shmem_sync_example.c new file mode 100644 index 000000000..8c447beee --- /dev/null +++ b/example_code/shmem_sync_example.c @@ -0,0 +1,28 @@ +#include +#include + +int main(void) +{ + static int x = 10101; + static long pSync[SHMEM_BARRIER_SYNC_SIZE]; + + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + + for (int i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i++) + pSync[i] = SHMEM_SYNC_VALUE; + + shmem_sync_all(); + + if (me % 2 == 0) { + /* put to next even PE in a circular fashion */ + shmem_p(&x, 4, (me + 2) % npes); + /* synchronize all even pes */ + shmem_quiet(); + shmem_sync(0, 1, (npes / 2 + npes % 2), pSync); + } + printf("%d: x = %d\n", me, x); + shmem_finalize(); + return 0; +} diff --git a/example_code/shmem_test_example1.c b/example_code/shmem_test_example1.c new file mode 100644 index 000000000..d3f760f44 --- /dev/null +++ b/example_code/shmem_test_example1.c @@ -0,0 +1,30 @@ +#include +#include + +int user_wait_any(long *ivar, int count, int cmp, long value) +{ + int idx = 0; + while (!shmem_test(&ivar[idx], cmp, value)) + idx = (idx + 1) % count; + return idx; +} + +int main(void) +{ + shmem_init(); + const int mype = shmem_my_pe(); + const int npes = shmem_n_pes(); + + long *wait_vars = shmem_calloc(npes, sizeof(long)); + if (mype == 0) + { + int who = user_wait_any(wait_vars, npes, SHMEM_CMP_NE, 0); + printf("PE %d observed first update from PE %d\n", mype, who); + } + else + shmem_p(&wait_vars[mype], mype, 0); + + shmem_free(wait_vars); + shmem_finalize(); + return 0; +} diff --git a/example_code/writing_shmem_example.c b/example_code/writing_shmem_example.c index 6ee05d3f0..e04139e09 100644 --- a/example_code/writing_shmem_example.c +++ b/example_code/writing_shmem_example.c @@ -1,32 +1,35 @@ #include #include + #define SIZE 16 -int -main(int argc, char* argv[]) + +int main(void) { - short source[SIZE]; - static short dest[SIZE]; - int i, npes; - shmem_init(); - npes = shmem_n_pes(); - if (shmem_my_pe() == 0) { - /* initialize array */ - for(i = 0; i < SIZE; i++) - source[i] = i; - /* local, not symmetric */ - /* static makes it symmetric */ - /* put "size" words into dest on each PE */ - for(i = 1; i < npes; i++) - shmem_short_put(dest, source, SIZE, i); - } - shmem_barrier_all(); /* sync sender and receiver */ - if (shmem_my_pe() != 0) { - printf("dest on PE %d is \t", shmem_my_pe()); - for(i = 0; i < SIZE; i++) - printf("%hd \t", dest[i]); - printf("\n"); - } - shmem_finalize(); - return 0; + short source[SIZE]; + static short dest[SIZE]; + static long lock = 0; + shmem_init(); + int me = shmem_my_pe(); + int npes = shmem_n_pes(); + if (me == 0) { + /* initialize array */ + for (int i = 0; i < SIZE; i++) + source[i] = i; + /* local, not symmetric */ + /* static makes it symmetric */ + /* put "size" words into dest on each PE */ + for (int i = 1; i < npes; i++) + shmem_put(dest, source, SIZE, i); + } + shmem_barrier_all(); /* sync sender and receiver */ + if (me != 0) { + shmem_set_lock(&lock); + printf("dest on PE %d is \t", me); + for (int i = 0; i < SIZE; i++) + printf("%hd \t", dest[i]); + printf("\n"); + shmem_clear_lock(&lock); + } + shmem_finalize(); + return 0; } - diff --git a/figures/quiet.graffle b/figures/quiet.graffle index d01f3f254..9749d29ee 100644 --- a/figures/quiet.graffle +++ b/figures/quiet.graffle @@ -1,1309 +1,1397 @@ - - - - - ActiveLayerIndex - 0 - ApplicationVersion - - com.omnigroup.OmniGraffle - 139.18.0.187838 - - AutoAdjust - - BackgroundGraphic - - Bounds - {{0, 0}, {1152, 733}} - Class - SolidGraphic - ID - 2 - Style - - shadow - - Draws - NO - - stroke - - Draws - NO - - - - BaseZoom - 0 - CanvasOrigin - {0, 0} - ColumnAlign - 1 - ColumnSpacing - 36 - CreationDate - 2014-02-04 18:37:20 +0000 - Creator - Pavel Shamis - DisplayScale - 1 0/72 in = 1.0000 in - GraphDocumentVersion - 8 - GraphicsList - - - Bounds - {{70.096904063309495, 405.61554727051521}, {76.921333312988281, 15}} - Class - ShapedGraphic - ID - 65 - Rotation - 153.57574462890625 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{70.121753001297776, 163.67627997882585}, {77.871635437011719, 15}} - Class - ShapedGraphic - ID - 54 - Rotation - 153.82313537597656 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Class - LineGraphic - Head - - ID - 45 - - ID - 77 - Points - - {16, 264} - {224, 264.21963500976562} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - - - Bounds - {{548.94970253341262, 123}, {198, 125.83084106445312}} - Class - ShapedGraphic - ID - 70 - Shape - Rectangle - Style - - fill - - Color - - b - 0.999991 - g - 0.999974 - r - 1 - - - stroke - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs30 \cf0 PE K is any PE in the system.\ -\ -value1, value2, and value3\ -are delivered to target PEs and visible for PE K after the shmem_quiet() call.} - - - - Bounds - {{45.999997409511252, 263.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 76 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{16, 11}, {70, 39}} - Class - ShapedGraphic - ID - 75 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 2} - VerticalPad - 0 - - - - Bounds - {{45.999997409511252, 62.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 74 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{765.69970444076125, 263.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 62 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{735.69970703125, 11}, {70, 39}} - Class - ShapedGraphic - ID - 61 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE K} - VerticalPad - 0 - - - - Bounds - {{765.69970444076125, 62.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 60 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{432.95552725825144, 360.83631934136469}, {79.163833618164062, 15}} - Class - ShapedGraphic - ID - 57 - Rotation - 29.242952346801758 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Class - LineGraphic - ID - 56 - Points - - {352.099853515625, 264.21963500976562} - {805.69970703125, 263} - - Style - - stroke - - HeadArrow - 0 - Legacy - - Pattern - 24 - TailArrow - 0 - - - Tail - - ID - 45 - Info - 3 - - - - Bounds - {{429.58094966987062, 193.41368125253854}, {77.487899780273438, 15}} - Class - ShapedGraphic - ID - 55 - Rotation - 24.251314163208008 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{520.19970444076125, 263.71961975097656}, {10, 184.49977111816406}} - Class - ShapedGraphic - ID - 53 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{283.099853515625, 420.54342651367188}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 52 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{156.54992416732375, 377.10272216796875}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 51 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr5, value5, PE 2)} - - - - Bounds - {{156.54992416732375, 333.66201782226562}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 50 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr4, value4, PE 0)} - - - - Bounds - {{283.099853515625, 290.557861328125}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 49 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - Style - - fill - - Color - - b - 0.830403 - g - 0.830389 - r - 0.830414 - - - - - - Bounds - {{156.54992416732375, 172.49285888671875}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 48 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr3, value3, PE 0)} - - - - Bounds - {{156.54992416732375, 136.35284042358398}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 47 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr2, value2, PE 2)} - - - - Bounds - {{224, 250.38165283203125}, {128.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 45 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_quiet()} - - - - Bounds - {{156.54992416732375, 102.67642211914062}, {263.099853515625, 27.67596435546875}} - Class - ShapedGraphic - ID - 36 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - - Shape - Rectangle - Style - - stroke - - CornerRadius - 5 - - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 shmem_int_p (addr1, value1, PE 0)} - - - - Bounds - {{283.099853515625, 210.20545196533203}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 35 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{283.09985092513625, 62.500228881835938}, {10, 27.67596435546875}} - Class - ShapedGraphic - ID - 33 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - Bounds - {{432.56935019526316, 127.00000037163812}, {73.461151123046875, 15}} - Class - ShapedGraphic - ID - 27 - Rotation - 24.960399627685547 - Shape - AdjustableArrow - ShapeData - - ratio - 0.32189163565635681 - width - 14.973806381225586 - - Style - - fill - - Color - - a - 0.1 - b - 0 - g - 0 - r - 0 - - Draws - NO - MiddleFraction - 0.70634919404983521 - - shadow - - Color - - a - 0.4 - b - 0 - g - 0 - r - 0 - - Fuzziness - 0.0 - ShadowVector - {0, 2} - - stroke - - Color - - a - 0.75 - b - 0 - g - 0 - r - 0 - - - - TextRelativeArea - {{0.125, 0.25}, {0.75, 0.5}} - isConnectedShape - - - - Bounds - {{490.19970703125, 11}, {70, 39}} - Class - ShapedGraphic - ID - 30 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs28 \cf0 PE 1} - VerticalPad - 0 - - - - Bounds - {{253.09985092513625, 11}, {70, 39}} - Class - ShapedGraphic - ID - 28 - Shape - Circle - Style - - Text - - Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 -\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} -{\colortbl;\red255\green255\blue255;} -\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc - -\f0\fs32 \cf0 PE 0} - VerticalPad - 0 - - - - Bounds - {{520.19970444076125, 62.500225067138672}, {10, 202.21939086914062}} - Class - ShapedGraphic - ID - 1 - Magnets - - {0, 1} - {0, -1} - {1, 0} - {-1, 0} - {1, 1} - {1, -1} - {-1, 1} - {-1, -1} - - Shape - Rectangle - - - GridInfo - - GuidesLocked - NO - GuidesVisible - YES - HPages - 2 - ImageCounter - 1 - KeepToScale - - Layers - - - Lock - NO - Name - Layer 1 - Print - YES - View - YES - - - LayoutInfo - - Animate - NO - circoMinDist - 18 - circoSeparation - 0.0 - layoutEngine - dot - neatoSeparation - 0.0 - twopiSeparation - 0.0 - - LinksVisible - NO - MagnetsVisible - NO - MasterSheets - - ModificationDate - 2014-02-21 22:17:58 +0000 - Modifier - Shamis, Pavel - NotesVisible - NO - Orientation - 2 - OriginVisible - NO - PageBreaks - YES - PrintInfo - - NSBottomMargin - - float - 41 - - NSHorizonalPagination - - coded - BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG - - NSLeftMargin - - float - 18 - - NSPaperSize - - size - {612, 792} - - NSPrintReverseOrientation - - int - 0 - - NSRightMargin - - float - 18 - - NSTopMargin - - float - 18 - - - PrintOnePage - - ReadOnly - NO - RowAlign - 1 - RowSpacing - 36 - SheetTitle - Canvas 1 - SmartAlignmentGuidesActive - YES - SmartDistanceGuidesActive - YES - UniqueID - 1 - UseEntirePage - - VPages - 1 - WindowInfo - - CurrentSheet - 0 - ExpandedCanvases - - - name - Canvas 1 - - - Frame - {{108, 206}, {1434, 902}} - ListView - - OutlineWidth - 142 - RightSidebar - - Sidebar - - SidebarWidth - 120 - VisibleRegion - {{-81, -37}, {1314, 808}} - Zoom - 1 - ZoomValues - - - Canvas 1 - 1 - 1 - - - - - + + + + + ApplicationVersion + + com.omnigroup.OmniGraffle7 + 179.14.0.293243 + + CreationDate + 2014-02-04 18:37:20 +0000 + Creator + Pavel Shamis + GraphDocumentVersion + 14 + GuidesLocked + NO + GuidesVisible + YES + ImageCounter + 1 + LinksVisible + NO + MagnetsVisible + NO + MasterSheets + + ModificationDate + 2014-02-21 22:17:58 +0000 + Modifier + Shamis, Pavel + MovementHandleVisible + NO + NotesVisible + NO + OriginVisible + NO + PageBreaks + YES + PrintInfo + + NSBottomMargin + + float + 41 + + NSHorizonalPagination + + coded + BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG + + NSLeftMargin + + float + 18 + + NSPaperSize + + size + {611.99999332427979, 792} + + NSPrintReverseOrientation + + coded + BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG + + NSRightMargin + + float + 18 + + NSTopMargin + + float + 18 + + + ReadOnly + NO + Sheets + + + ActiveLayerIndex + 0 + AutoAdjust + 6 + AutosizingMargin + 72 + BackgroundGraphic + + Bounds + {{0, 0}, {1152, 733}} + Class + GraffleShapes.CanvasBackgroundGraphic + ID + 2 + Style + + shadow + + Draws + NO + + stroke + + Draws + NO + + + + BaseZoom + 0 + CanvasDimensionsOrigin + {0, 0} + CanvasOrigin + {0, 0} + CanvasSizingMode + 1 + ColumnAlign + 1 + ColumnSpacing + 36 + DisplayScale + 1 in = 1.00000 in + GraphicsList + + + Bounds + {{70.096904063309495, 405.61554727051521}, {76.921333312988281, 15}} + Class + ShapedGraphic + ID + 3 + Rotation + 153.57574462890625 + Shape + AdjustableArrow + ShapeData + + ratio + 0.32189163565635681 + width + 14.973806381225586 + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + MiddleFraction + 0.70634919404983521 + + shadow + + Color + + a + 0.40000000000000002 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Fuzziness + 0.0 + + stroke + + Color + + a + 0.75 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + + + TextRelativeArea + {{0.125, 0.25}, {0.75, 0.5}} + isConnectedShape + + + + Bounds + {{70.121753001297776, 163.67627997882585}, {77.871635437011719, 15}} + Class + ShapedGraphic + ID + 4 + Rotation + 153.82313537597656 + Shape + AdjustableArrow + ShapeData + + ratio + 0.32189163565635681 + width + 14.973806381225586 + + Style + + fill + + Color + + a + 0.10000000000000001 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Draws + NO + MiddleFraction + 0.70634919404983521 + + shadow + + Color + + a + 0.40000000000000002 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Fuzziness + 0.0 + + stroke + + Color + + a + 0.75 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + + + TextRelativeArea + {{0.125, 0.25}, {0.75, 0.5}} + isConnectedShape + + + + Class + LineGraphic + Head + + ID + 23 + + ID + 5 + Points + + {16, 264} + {224, 264.21963500976562} + + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + HeadArrow + 0 + Legacy + + Pattern + 24 + TailArrow + 0 + + + + + Bounds + {{548.94970253341262, 123}, {198, 125.83084106445312}} + Class + ShapedGraphic + ID + 6 + Style + + fill + + Color + + b + 0.99999970197677612 + g + 0.99999970197677612 + r + 0.9999966025352478 + space + srgb + + + stroke + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs30 \cf0 PE K is any PE in the system.\ +\ +value1, value2, and value3\ +are delivered to target PEs and visible for PE K after the shmem_quiet() call.} + + + + Bounds + {{45.999997409511252, 263.71961975097656}, {10, 184.49977111816406}} + Class + ShapedGraphic + ID + 7 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + + + + Bounds + {{16, 11}, {70, 39}} + Class + ShapedGraphic + ID + 8 + Shape + Circle + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 PE 2} + VerticalPad + 0.0 + + + + Bounds + {{45.999997409511252, 62.500225067138672}, {10, 202.21939086914062}} + Class + ShapedGraphic + ID + 9 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + + + Bounds + {{765.69970444076125, 263.71961975097656}, {10, 184.49977111816406}} + Class + ShapedGraphic + ID + 10 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + + + + Bounds + {{735.69970703125, 11}, {70, 39}} + Class + ShapedGraphic + ID + 11 + Shape + Circle + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 PE K} + VerticalPad + 0.0 + + + + Bounds + {{765.69970444076125, 62.500225067138672}, {10, 202.21939086914062}} + Class + ShapedGraphic + ID + 12 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + + + Bounds + {{432.95552725825144, 360.83631934136469}, {79.163833618164062, 15}} + Class + ShapedGraphic + ID + 13 + Rotation + 29.242952346801758 + Shape + AdjustableArrow + ShapeData + + ratio + 0.32189163565635681 + width + 14.973806381225586 + + Style + + fill + + Color + + a + 0.10000000000000001 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + MiddleFraction + 0.70634919404983521 + + shadow + + Color + + a + 0.40000000000000002 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Fuzziness + 0.0 + + stroke + + Color + + a + 0.75 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + + + TextRelativeArea + {{0.125, 0.25}, {0.75, 0.5}} + isConnectedShape + + + + Class + LineGraphic + ID + 14 + Points + + {352.099853515625, 264.21963500976562} + {805.69970703125, 263} + + Style + + fill + + Draws + NO + + shadow + + Draws + NO + + stroke + + HeadArrow + 0 + Legacy + + Pattern + 24 + TailArrow + 0 + + + Tail + + ID + 23 + Info + 3 + + + + Bounds + {{429.58094966987062, 193.41368125253854}, {77.487899780273438, 15}} + Class + ShapedGraphic + ID + 15 + Rotation + 24.251314163208008 + Shape + AdjustableArrow + ShapeData + + ratio + 0.32189163565635681 + width + 14.973806381225586 + + Style + + fill + + Color + + a + 0.10000000000000001 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Draws + NO + MiddleFraction + 0.70634919404983521 + + shadow + + Color + + a + 0.40000000000000002 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Fuzziness + 0.0 + + stroke + + Color + + a + 0.75 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + + + TextRelativeArea + {{0.125, 0.25}, {0.75, 0.5}} + isConnectedShape + + + + Bounds + {{520.19970444076125, 263.71961975097656}, {10, 184.49977111816406}} + Class + ShapedGraphic + ID + 16 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + + + + Bounds + {{283.099853515625, 420.54342651367188}, {10, 27.67596435546875}} + Class + ShapedGraphic + ID + 17 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + + + + Bounds + {{156.54992416732375, 377.10272216796875}, {263.099853515625, 27.67596435546875}} + Class + ShapedGraphic + ID + 18 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + stroke + + CornerRadius + 5 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 shmem_int_p (addr5, value5, PE 2)} + + + + Bounds + {{156.54992416732375, 333.66201782226562}, {263.099853515625, 27.67596435546875}} + Class + ShapedGraphic + ID + 19 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + stroke + + CornerRadius + 5 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 shmem_int_p (addr4, value4, PE 1)} + + + + Bounds + {{283.099853515625, 290.557861328125}, {10, 27.67596435546875}} + Class + ShapedGraphic + ID + 20 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + Style + + fill + + Color + + b + 0.86274486780166626 + g + 0.86274498701095581 + r + 0.86274510622024536 + space + srgb + + + + + + Bounds + {{156.54992416732375, 172.49285888671875}, {263.099853515625, 27.67596435546875}} + Class + ShapedGraphic + ID + 21 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Style + + stroke + + CornerRadius + 5 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 shmem_int_p (addr3, value3, PE 1)} + + + + Bounds + {{156.54992416732375, 136.35284042358398}, {263.099853515625, 27.67596435546875}} + Class + ShapedGraphic + ID + 22 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Style + + stroke + + CornerRadius + 5 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 shmem_int_p (addr2, value2, PE 2)} + + + + Bounds + {{224, 250.38165283203125}, {128.099853515625, 27.67596435546875}} + Class + ShapedGraphic + ID + 23 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Style + + stroke + + CornerRadius + 5 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 shmem_quiet()} + + + + Bounds + {{156.54992416732375, 102.67642211914062}, {263.099853515625, 27.67596435546875}} + Class + ShapedGraphic + ID + 24 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + + Style + + stroke + + CornerRadius + 5 + + + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 shmem_int_p (addr1, value1, PE 1)} + + + + Bounds + {{283.099853515625, 210.20545196533203}, {10, 27.67596435546875}} + Class + ShapedGraphic + ID + 25 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + + + Bounds + {{283.09985092513625, 62.500228881835938}, {10, 27.67596435546875}} + Class + ShapedGraphic + ID + 26 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + + + Bounds + {{432.56935019526316, 127.00000037163812}, {73.461151123046875, 15}} + Class + ShapedGraphic + ID + 27 + Rotation + 24.960399627685547 + Shape + AdjustableArrow + ShapeData + + ratio + 0.32189163565635681 + width + 14.973806381225586 + + Style + + fill + + Color + + a + 0.10000000000000001 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Draws + NO + MiddleFraction + 0.70634919404983521 + + shadow + + Color + + a + 0.40000000000000002 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + Fuzziness + 0.0 + + stroke + + Color + + a + 0.75 + b + 0.0 + g + 0.0 + r + 0.0 + space + srgb + + + + TextRelativeArea + {{0.125, 0.25}, {0.75, 0.5}} + isConnectedShape + + + + Bounds + {{490.19970703125, 11}, {70, 39}} + Class + ShapedGraphic + ID + 28 + Shape + Circle + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs28 \cf0 PE 1} + VerticalPad + 0.0 + + + + Bounds + {{253.09985092513625, 11}, {70, 39}} + Class + ShapedGraphic + ID + 29 + Shape + Circle + Text + + Text + {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf830 +{\fonttbl\f0\fswiss\fcharset0 Helvetica;} +{\colortbl;\red255\green255\blue255;} +{\*\expandedcolortbl;;} +\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc\partightenfactor0 + +\f0\fs32 \cf0 PE 0} + VerticalPad + 0.0 + + + + Bounds + {{520.19970444076125, 62.500225067138672}, {10, 202.21939086914062}} + Class + ShapedGraphic + ID + 30 + Magnets + + {0, 1} + {0, -1} + {1, 0} + {-1, 0} + {1, 1} + {1, -1} + {-1, 1} + {-1, -1} + + + + GridInfo + + HPages + 2 + KeepToScale + + Layers + + + Artboards + + Lock + + Name + Layer 1 + Print + + View + + + + LayoutInfo + + Animate + NO + circoMinDist + 18 + circoSeparation + 0.0 + layoutEngine + dot + neatoLineLength + 0.20000000298023224 + neatoSeparation + 0.0 + twopiSeparation + 0.0 + + Orientation + 2 + PrintOnePage + + RowAlign + 1 + RowSpacing + 36 + SheetTitle + Canvas 1 + UniqueID + 1 + VPages + 1 + VisibleVoidKey + 1 + + + SmartAlignmentGuidesActive + YES + SmartDistanceGuidesActive + YES + UseEntirePage + + WindowInfo + + CurrentSheet + 0 + Frame + {{0, 0}, {1276, 777}} + ShowInfo + + Sidebar + + SidebarWidth + 200 + Sidebar_Tab + 0 + VisibleRegion + {{0, -127}, {774, 667}} + ZoomValues + + + Canvas 1 + 1 + 1 + + + + compressOnDiskKey + + copyLinkedImagesKey + + createSinglePDFKey + + exportAreaKey + 0 + exportQualityKey + 100 + exportSizesKey + + 1 + + + fileFormatKey + 0 + htmlImageTypeKey + 0 + includeBackgroundGraphicKey + + includeNonPrintingLayersKey + + lastExportTypeKey + 8 + marginWidthKey + 0.0 + previewTypeKey + 0 + readOnlyKey + + resolutionForBMPKey + 1 + resolutionForGIFKey + 1 + resolutionForHTMLKey + 1 + resolutionForJPGKey + 1 + resolutionForPNGKey + 1 + resolutionForTIFFKey + 1 + resolutionUnitsKey + 0 + saveAsFlatFileOptionKey + 3 + useArtboardsKey + + useMarginKey + + useNotesKey + + + diff --git a/figures/quiet.pdf b/figures/quiet.pdf index 3f674f238..fed7f1d52 100644 Binary files a/figures/quiet.pdf and b/figures/quiet.pdf differ diff --git a/figures/wait.graffle b/figures/wait.graffle index ba0b04fdd..8a05b6cf9 100644 --- a/figures/wait.graffle +++ b/figures/wait.graffle @@ -6,7 +6,7 @@ 0 ApplicationVersion - com.omnigroup.OmniGraffle + com.omnigroup.OmniGrafflePro 139.18.0.187838 AutoAdjust @@ -90,7 +90,7 @@ Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -103,7 +103,7 @@ Bounds - {{576, 66}, {201, 58.824271203326333}} + {{576, 66}, {221, 58.824271203326333}} Class ShapedGraphic ID @@ -140,12 +140,12 @@ Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc -\f0\fs30 \cf0 shmem_wait is a blocking operation therefore it waits until value in +\f0\fs30 \cf0 shmem_wait_until is a blocking operation therefore it waits until value in \i addr \i0 is updated} @@ -189,7 +189,7 @@ Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;\f1\froman\fcharset0 Times-Roman;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -227,7 +227,7 @@ Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -352,7 +352,7 @@ Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -450,7 +450,7 @@ is Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -495,7 +495,7 @@ is Text Text - {\rtf1\ansi\ansicpg1252\cocoartf1187\cocoasubrtf400 + {\rtf1\ansi\ansicpg1252\cocoartf1404\cocoasubrtf110 \cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;} {\colortbl;\red255\green255\blue255;} \pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc @@ -574,9 +574,9 @@ is MasterSheets ModificationDate - 2014-02-10 21:26:55 +0000 + 2017-10-12 23:18:46 +0000 Modifier - Shamis, Pavel + Sreeram Potluri NotesVisible NO Orientation @@ -655,7 +655,7 @@ is Frame - {{398, 136}, {1067, 872}} + {{157, 4}, {1067, 872}} ListView OutlineWidth @@ -669,7 +669,7 @@ is SidebarWidth 120 VisibleRegion - {{0, 0}, {932, 733}} + {{0, 0}, {918, 715}} Zoom 1 ZoomValues diff --git a/figures/wait.pdf b/figures/wait.pdf index 6ece7fddc..b48fa8b7b 100644 Binary files a/figures/wait.pdf and b/figures/wait.pdf differ diff --git a/main_spec.tex b/main_spec.tex index 60026e7a9..6c2c46596 100644 --- a/main_spec.tex +++ b/main_spec.tex @@ -4,15 +4,13 @@ \input{utils/defs} -\makeindex - \begin{document} \input{content/frontmatter} -\section{The \openshmem Effort}\label{subsec:openshmem_effort} +\section{The OpenSHMEM Effort}\label{subsec:openshmem_effort} \input{content/the_openshmem_effort} \section{Programming Model Overview}\label{subsec:programming_model} @@ -30,6 +28,9 @@ \section{Language Bindings and Conformance}\label{subsec:bindings} \section{Library Constants}\label{subsec:library_constants} \input{content/library_constants} +\section{Library Handles}\label{subsec:library_handles} +\input{content/library_handles} + \section{Environment Variables }\label{subsec:environment_variables} \input{content/environment_variables} @@ -44,7 +45,7 @@ \section{OpenSHMEM Library API}\label{sec:openshmem_library_api} \subsection{Library Setup, Exit, and Query Routines} The library setup and query interfaces that initialize and monitor the parallel -environment of the \ac{PE}s. +environment of the \acp{PE}. \subsubsection{\textbf{SHMEM\_INIT}}\label{subsec:shmem_init} \input{content/shmem_init} @@ -79,30 +80,68 @@ \subsubsection{\textbf{SHMEM\_INFO\_GET\_NAME}}\label{subsec:shmem_info_get_name \subsubsection{\textbf{START\_PES}}\label{subsec:start_pes} \input{content/start_pes} +\subsection{Thread Support} +\label{subsec:thread_support} +\input{content/threads_intro.tex} +\subsubsection{\textbf{SHMEM\_INIT\_THREAD}} +\label{subsec:shmem_init_thread} +\input{content/shmem_init_thread} +\subsubsection{\textbf{SHMEM\_QUERY\_THREAD}} +\label{subsec:shmem_query_thread} +\input{content/shmem_query_thread} \subsection{Memory Management Routines} -\openshmem provides a set of \ac{API}s for managing the symmetric heap. The -\ac{API}s allow one to dynamically allocate, deallocate, reallocate and align -symmetric data objects in the symmetric heap, in \Clang{} and \Fortran. +\label{sec:memory_management} + +\openshmem provides a set of \acp{API} for managing the symmetric heap. The +\acp{API} allow one to dynamically allocate, deallocate, reallocate and align +symmetric data objects in the symmetric heap. \subsubsection{\textbf{SHMEM\_MALLOC, SHMEM\_FREE, SHMEM\_REALLOC, SHMEM\_ALIGN}}\label{subsec:shfree} \input{content/shmem_malloc.tex} +\subsubsection{\textbf{SHMEM\_CALLOC}}\label{subsec:shmem_calloc} +\input{content/shmem_calloc.tex} + \subsubsection{\textbf{SHPALLOC}}\label{subsec:shpalloc} \input{content/shpalloc.tex} \subsubsection{\textbf{SHPCLMOVE}}\label{subsec:shpclmove} \input{content/shpclmove.tex} -\subsubsection{\textbf{SHPDEALLOC}}\label{subsec:shpdealloc} -\input{content/shpdealloc.tex} +\subsubsection{\textbf{SHPDEALLC}}\label{subsec:shpdeallc} +\input{content/shpdeallc.tex} + +\subsection{Communication Management Routines} +\label{sec:ctx} +All \openshmem RMA, AMO, and memory ordering routines are +performed on a communication context. The communication context defines an +independent ordering and completion environment, allowing users to manage the +overlap of communication with computation and also to manage communication +operations performed by separate threads within a multithreaded \ac{PE}. For +example, in single-threaded environments, contexts may be used to pipeline +communication and computation. In multithreaded environments, contexts may +additionally provide thread isolation, eliminating overheads resulting from +thread interference. +Context handles are of type \CTYPE{shmem\_ctx\_t} and are valid for +language-level assignment and equality comparison. A handle to the desired context is +passed as an argument in the \Cstd \CTYPE{shmem\_ctx\_*} and type-generic API +routines. API routines that do not accept a context argument operate on the +default context. The default context can be used explicitly through the +\LibHandleRef{SHMEM\_CTX\_DEFAULT} handle. +\subsubsection{\textbf{SHMEM\_CTX\_CREATE}} +\label{subsec:shmem_ctx_create} +\input{content/shmem_ctx_create.tex} +\subsubsection{\textbf{SHMEM\_CTX\_DESTROY}} +\label{subsec:shmem_ctx_destroy} +\input{content/shmem_ctx_destroy.tex} \subsection{Remote Memory Access Routines}\label{sec:rma} @@ -139,29 +178,61 @@ \subsubsection{\textbf{SHMEM\_GET\_NBI}}\label{subsec:shmem_get_nbi} \subsection{Atomic Memory Operations}\label{sec:amo} \input{content/atomics_intro} -\subsubsection{\textbf{SHMEM\_ADD}}\label{subsec:shmem_add} -\input{content/shmem_add.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH}} +\label{subsec:shmem_atomic_fetch} +\input{content/shmem_atomic_fetch.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_SET}} +\label{subsec:shmem_atomic_set} +\input{content/shmem_atomic_set.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_COMPARE\_SWAP}} +\label{subsec:shmem_atomic_compare_swap} +\input{content/shmem_atomic_compare_swap.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_SWAP}} +\label{subsec:shmem_atomic_swap} +\input{content/shmem_atomic_swap.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_INC}} +\label{subsec:shmem_atomic_fetch_inc} +\input{content/shmem_atomic_fetch_inc.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_INC}} +\label{subsec:shmem_atomic_inc} +\input{content/shmem_atomic_inc.tex} + +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_ADD}} +\label{subsec:shmem_atomic_fetch_add} +\input{content/shmem_atomic_fetch_add.tex} -\subsubsection{\textbf{SHMEM\_CSWAP}}\label{subsec:shmem_cswap} -\input{content/shmem_cswap.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_ADD}} +\label{subsec:shmem_atomic_add} +\input{content/shmem_atomic_add.tex} -\subsubsection{\textbf{SHMEM\_SWAP}}\label{subsec:shmem_swap} -\input{content/shmem_swap.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_AND}} +\label{subsec:shmem_atomic_fetch_and} +\input{content/shmem_atomic_fetch_and.tex} -\subsubsection{\textbf{SHMEM\_FINC}}\label{subsec:shmem_finc} -\input{content/shmem_finc.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_AND}} +\label{subsec:shmem_atomic_and} +\input{content/shmem_atomic_and.tex} -\subsubsection{\textbf{SHMEM\_INC}}\label{subsec:shmem_inc} -\input{content/shmem_inc.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_OR}} +\label{subsec:shmem_atomic_fetch_or} +\input{content/shmem_atomic_fetch_or.tex} -\subsubsection{\textbf{SHMEM\_FADD}}\label{subsec:shmem_fadd} -\input{content/shmem_fadd.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_OR}} +\label{subsec:shmem_atomic_or} +\input{content/shmem_atomic_or.tex} -\subsubsection{\textbf{SHMEM\_FETCH}}\label{subsec:shmem_fetch} -\input{content/shmem_fetch.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_FETCH\_XOR}} +\label{subsec:shmem_atomic_fetch_xor} +\input{content/shmem_atomic_fetch_xor.tex} -\subsubsection{\textbf{SHMEM\_SET}}\label{subsec:shmem_set} -\input{content/shmem_set.tex} +\subsubsection{\textbf{SHMEM\_ATOMIC\_XOR}} +\label{subsec:shmem_atomic_xor} +\input{content/shmem_atomic_xor.tex} @@ -176,6 +247,12 @@ \subsubsection{\textbf{SHMEM\_BARRIER\_ALL}}\label{subsec:shmem_barrier_all} \subsubsection{\textbf{SHMEM\_BARRIER}}\label{subsec:shmem_barrier} \input{content/shmem_barrier.tex} +\subsubsection{\textbf{SHMEM\_SYNC\_ALL}}\label{subsec:shmem_sync_all} +\input{content/shmem_sync_all.tex} + +\subsubsection{\textbf{SHMEM\_SYNC}}\label{subsec:shmem_sync} +\input{content/shmem_sync.tex} + \subsubsection{\textbf{SHMEM\_BROADCAST}}\label{subsec:shmem_broadcast} \input{content/shmem_broadcast.tex} @@ -195,22 +272,23 @@ \subsubsection{\textbf{SHMEM\_ALLTOALLS}}\label{subsec:shmem_alltoalls} -\subsection{Point-To-Point Synchronization Routines} -The following section discusses \openshmem \ac{API}s that provides a mechanism -for synchronization between two \ac{PE}s based on the value of a symmetric data -object. +\subsection{Point-To-Point Synchronization Routines}\label{subsec:p2p_intro} +\input{content/p2p_sync_intro.tex} + +\subsubsection{\textbf{SHMEM\_WAIT\_UNTIL}}\label{subsec:shmem_wait_until} +\input{content/shmem_wait_until.tex} -\subsubsection{\textbf{SHMEM\_WAIT}}\label{subsec:shmem_wait} -\input{content/shmem_wait.tex} +\subsubsection{\textbf{SHMEM\_TEST}}\label{subsec:shmem_test} +\input{content/shmem_test.tex} \subsection{Memory Ordering Routines}\label{subsec:memory_order} -The following section discusses \openshmem \ac{API}s that provide mechanisms to -ensure ordering and/or delivery of \OPR{Put}, \ac{AMO}, and memory store -routines to symmetric data objects. +The following section discusses \openshmem \acp{API} that provide mechanisms to +ensure ordering and/or delivery of \OPR{Put}, \ac{AMO}, memory store, +and non-blocking \PUT{} and \GET{} routines to symmetric data objects. \subsubsection{\textbf{SHMEM\_FENCE}}\label{subsec:shmem_fence} \input{content/shmem_fence.tex} @@ -218,7 +296,7 @@ \subsubsection{\textbf{SHMEM\_FENCE}}\label{subsec:shmem_fence} \subsubsection{\textbf{SHMEM\_QUIET}}\label{subsec:shmem_quiet} \input{content/shmem_quiet.tex} -\subsubsection{Synchronization and Communication Ordering in \openshmem} +\subsubsection{Synchronization and Communication Ordering in OpenSHMEM} \input{content/synchronization_model.tex} @@ -247,16 +325,13 @@ \subsection{Cache Management} \subsubsection{\textbf{SHMEM\_CACHE}}\label{subsec:shmem_cache} \input{content/shmem_cache.tex} - - - - \clearpage - - - \input{content/backmatter} +\clearpage +\phantomsection +\addcontentsline{toc}{chapter}{Index} +\printindex \end{document} diff --git a/makefile b/makefile deleted file mode 100644 index b19f46981..000000000 --- a/makefile +++ /dev/null @@ -1,38 +0,0 @@ -TARGET=main_spec - -# make pdf by default -all: ${TARGET}.pdf - -# it doesn't really need the .dvi, but this way all the refs are right -%.pdf : %.dvi - pdflatex $* - -${TARGET}.bbl: ${TARGET}.bib -# in case we don't already have a .aux file listing citations -# this should probably be a separate makefile target/dependency instead -# of doing it every time... but *shrug* - pdflatex ${TARGET}.tex -# get the citations out of the bibliography - bibtex ${TARGET} -# do it again in case there are out-of-order cross-references - @pdflatex ${TARGET}.tex - -#${TARGET}.dvi: ${TARGET}.bbl ${TARGET}.tex -${TARGET}.dvi: ${TARGET}.tex - @pdflatex ${TARGET}.tex - -# shortcut, so we can say "make ps" -ps: ${TARGET}.ps - -${TARGET}.ps: ${TARGET}.dvi - @dvips -t a4 ${TARGET}.dvi - -clean: - rm -f ${TARGET}.{log,aux,ps,dvi,bbl,blg,log,idx,out,toc} chappage.txt - -veryclean: clean - rm -f ${TARGET}.{ps,pdf,out} - - -PHONY : ps all clean reallyclean - diff --git a/utils/defs.tex b/utils/defs.tex index 6fb14c151..e1602b03e 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -1,3 +1,7 @@ +\makeindex + +\newcommand{\minitab}[2][l]{\begin{tabular}{@{}#1@{}}#2\end{tabular}} + \definecolor{ListingBG}{rgb}{0.91,0.91,0.91} \definecolor{shadecolor}{rgb}{0.92,0.92,0.92} @@ -29,40 +33,61 @@ \newcommand{\newtext}[1]{\textcolor{ForestGreen}{#1}} \newcommand{\oldtext}[1]{\textcolor{magenta}{\sout{#1}}} -\newcommand{\insertDocVersion}{1.3} -\newcommand{\OSH}{\emph{OpenSHMEM}} -\newcommand{\openshmem}{{Open\-SHMEM}\xspace} +\newcommand{\insertDocVersion}{1.4} +\newcommand{\openshmem}[1][]{% + {Open\-SHMEM\ifthenelse{\equal{#1}{}}{}{~#1}}\xspace} +\newcommand{\HEADER}[1]{\textit{#1}} \newcommand{\FUNC}[1]{\textit{#1}} +\newcommand{\CTYPE}[1]{\textit{#1}} \newcommand{\VAR}[1]{\textit{#1}} \newcommand{\CONST}[1]{\textit{#1}} -\newcommand{\const}[1]{\protect\gb\protect{\textsf{\small #1}}\index{CONST:#1}} +%% \newcommand{\CorCpp}{\textit{C/C++}\xspace} \newcommand{\CorCppFor}{\textit{C/C++/Fortran}\xspace} -\newcommand{\Fortran}{\textit{Fortran}} -\newcommand{\Clang}{\textit{C}} -\newcommand{\Cpp}{\textit{C++}} -\newcommand{\Celev}{\textit{C11}} +\newcommand{\Fortran}[1][]{% + \textit{Fortran\ifthenelse{\equal{#1}{}}{}{~#1}}\xspace} +\newcommand{\Cstd}[1][]{% + \textit{C\ifthenelse{\equal{#1}{}}{}{#1}}\xspace} +\newcommand{\Cpp}[1][]{% + \textit{C++\ifthenelse{\equal{#1}{}}{}{#1}}\xspace} +%% \newcommand{\TYPE}{\emph{TYPE}} \newcommand{\TYPENAME}{\emph{TYPENAME}} \newcommand{\SIZE}{\emph{SIZE}} \newcommand{\source}{\textit{source}} -\newcommand{\target}{\textit{target}} +\newcommand{\dest}{\textit{dest}} \newcommand{\PUT}{\textit{Put}} \newcommand{\GET}{\textit{Get}} \newcommand{\OPR}[1]{\textit{#1}} -\newcommand{\dest}{\textit{dest}} -\newcommand{\barrier}{\FUNC{SHMEM\_BARRIER}\xspace} % why here an not others? -\newcommand{\barrierall}{\FUNC{SHMEM\_BARRIER\_ALL}\xspace} % why here an not others? -\newcommand{\broadcast}{\FUNC{SHMEM\_BROADCAST}} -\newcommand{\collect}{\FUNC{SHMEM\_COLLECT}} -\newcommand{\fcollect}{\FUNC{SHMEM\_FCOLLECT}} -\newcommand{\reduction}{\textit{Reduction Operations}} -\newcommand{\alltoall}{\FUNC{SHMEM\_ALLTOALL}} -\newcommand{\alltoalls}{\FUNC{SHMEM\_ALLTOALLS}} -\newcommand{\activeset}{\textit{Active~set}\xspace} % why here and not others? \newcommand{\shmemprefix}{\textit{SHMEM\_}} +\newcommand{\shmemprefixLC}{\textit{shmem\_}} \newcommand{\shmemprefixC}{\textit{\_SHMEM\_}} +\newcommand{\ith}{${\textit{i}^{\text{\tiny th}}}$} +\newcommand{\jth}{${\textit{j}^{\text{\tiny th}}}$} +\newcommand{\kth}{${\textit{k}^{\text{\tiny th}}}$} +\newcommand{\lth}{${\textit{l}^{\text{\tiny th}}}$} + +%% Generate indexed reference. +\newcommand{\EnvVarIndex}[1]{\index{#1}} +\newcommand{\FuncIndex}[1]{\index{#1}} +\newcommand{\LibConstIndex}[1]{\index{#1}} +\newcommand{\LibHandleIndex}[1]{\index{#1}} +\newcommand{\TableIndex}[1]{\index{#1}\index{Tables!#1}} +%% Write text and generate reference. +\newcommand{\EnvVarRef}[1]{\VAR{#1}\EnvVarIndex{#1}} +\newcommand{\FuncRef}[1]{\FUNC{#1}\FuncIndex{#1}} +\newcommand{\LibConstRef}[1]{\CONST{#1}\LibConstIndex{#1}} +\newcommand{\LibHandleRef}[1]{\CONST{#1}\LibHandleIndex{#1}} +\newcommand{\TableCaptionRef}[1]{\caption{#1}\TableIndex{#1}} +%% Specialized declaration/creation and generate reference. +\newcommand{\EnvVarDecl}[1]{\EnvVarRef{#1}} +\newcommand{\FuncDecl}[1]{{\ListingsCurrentStyle{#1}}\FuncIndex{#1}} +\newcommand{\FuncParam}[1]{<{\ListingsKeywordStyle{#1}}>} +\newcommand{\LibConstDecl}[2][\CorCppFor]{% + \parbox[t]{0pt}{~\\[-4pt] #1: \\\hspace*{8mm} \LibConstRef{#2} \\~}} +\newcommand{\LibHandleDecl}[2][\CorCppFor]{% + \parbox[t]{0pt}{~\\[-4pt] #1: \\\hspace*{8mm} \LibHandleRef{#2} \\~}} \begin{acronym} \acro{RMA}{\emph{Remote Memory Access}} @@ -74,16 +99,35 @@ \acro{API}{\emph{Application Programming Interface}} \acro{MPI}{\emph{Message Passing Interface}} \acro{SPMD}{\emph{Single Program Multiple Data}} +\acro{ARL}{Army Research Laboratory} +\acro{AMD}{Advanced Micro Devices} +\acro{MPMD}{\emph{Multiple Program Multiple Data}} +\acro{TCP}{\emph{Transmission Control Protocol}} \acro{UH}{University of Houston} \acro{UO}{University of Oregon} \acro{ORNL}{Oak Ridge National Laboratory} \acro{LANL}{Los Alamos National Laboratory} \acro{ESSC}{Extreme Scale Systems Center} -\acro{OSSS}{Open Software System Solutions} +\acro{OSSS}{Open Source Software Solutions} +\acro{SGI}{Silicon Graphics International} \acro{DoD}{U.S. Department of Defense} +\acro{SBU}{Stonybrook University} +\acro{UTK}{University of Tenneesee at Knoxville} +\acro{HPE}{Hewlett Packard Enterprise} \end{acronym} +% Grab current listings style for use in environment escape to LaTeX. +% https://tex.stackexchange.com/a/209644 +\makeatletter +\newcommand\ListingsCurrentStyle{} +\lst@AddToHook{Output}{\global\let\ListingsCurrentStyle\lst@thestyle} +\lst@AddToHook{OutputOther}{\global\let\ListingsCurrentStyle\lst@thestyle} +\newcommand\ListingsKeywordStyle{} +\lst@AddToHook{Output}{\global\let\ListingsKeywordStyle\lst@keywordstyle} +\lst@AddToHook{OutputOther}{\global\let\ListingsKeywordStyle\lst@keywordstyle} +\makeatother + % % This is used to put line numbers on plain pages. Used in draft.tex % @@ -138,6 +182,7 @@ % % Use Sans Serif font for sections, etc. % +% \makeatletter \def\section{\@startsection {section}{1}{\z@}{-3.5ex plus -1ex minus -.2ex}{2.3ex plus .2ex}{\Large\sf}} @@ -145,8 +190,8 @@ -.2ex}{1.5ex plus .2ex}{\large\sf}} \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-3.25ex plus -1ex minus -.2ex}{1.5ex plus .2ex}{\normalsize\sf\bf}} -\def\paragraph{\@startsection {paragraph}{4}{\z@}{3.25ex plus 1ex -minus .2ex}{-1em}{\normalsize\sf}} +\def\paragraph{\@startsection {paragraph}{4}{\z@}{3.25ex plus 1ex minus .2ex} +{-1em}{\normalsize\sf\bf}} % Indent after \paragraph \makeatother % % End use Sans Serif font for sections, etc. S. Otto @@ -159,10 +204,9 @@ \definecolor{gray}{rgb}{0.92,0.92,0.92} \lstset{ % set defaults for languages not otherwise defined - breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace + breakatwhitespace=true, % sets if automatic breaks should only happen at whitespace basicstyle=\ttfamily\footnotesize, breaklines=true, % sets automatic line breaking - escapeinside={|}{|}, % if you want to add LaTeX within your code extendedchars=true, % lets you use non-ASCII characters; for 8-bits % encodings only, does not work with UTF-8 keepspaces=true, % keeps spaces in text, useful for keeping indentation of code @@ -219,6 +263,7 @@ \lstdefinelanguage{OSH+C}[]{C}{ classoffset=1, morekeywords={ + size_t, ptrdiff_t, SHMEM_BCAST_SYNC_SIZE, SHMEM_SYNC_VALUE, start_pes, my_pe, _my_pe, shmem_my_pe, @@ -234,7 +279,7 @@ shmem_short_fadd, shmem_int_fadd, shmem_long_fadd, shmem_set_lock, shmem_test_lock, shmem_clear_lock, shmem_long_sum_to_all, - shmem_complexd_sum_to_all, + shmem_complexd_sum_to_all }, keywordstyle=\color{black}\textbf, classoffset=0, @@ -300,10 +345,40 @@ % End this section is for example code listings % +% +% Deprecation Helpers +% + +\newcommand{\strikeline}[1][red]{{\color{#1}\raisebox{.5ex}{\rule{1em}{.4pt}}}} +\newcommand{\stretchline}[1][red]{\xrfill[.5ex]{.4pt}[#1]} +\newcommand{\DeprecationStart}[1][red]{{\color{#1} deprecation start} \mbox{}} +\newcommand{\DeprecationEnd}[1][red]{{\color{#1} deprecation end} \mbox{}} + +\newcommand{\StartDeprecateBlock}{ + {\strikeline\mbox{} \DeprecationStart \stretchline\mbox{}}} +\newcommand{\EndDeprecateBlock}{% + \mbox{}\stretchline\mbox{} \DeprecationEnd \strikeline} + +\newenvironment{DeprecateBlock}{% + \par \StartDeprecateBlock \par}{\par \EndDeprecateBlock \par} + +\newcommand{\StartInlineDeprecate}{% + \strikeline\mbox{} \DeprecationStart \strikeline \mbox{}} +\newcommand{\EndInlineDeprecate}{% + \strikeline\mbox{} \DeprecationEnd \strikeline} +\newenvironment{DeprecateInline}{\StartInlineDeprecate}{\EndInlineDeprecate} + % % Library API description template commands % +\newcommand{\deprecationstart}{\color{red} \raisebox{.5ex}{\rule{1em}{.4pt}} + deprecation start \xrfill[.5ex]{.4pt}[red] \mbox{}} +\newcommand{\deprecationend}{\mbox{}\xrfill[.5ex]{.4pt}[red]\mbox{} \color{red} + deprecation end \raisebox{.5ex}{\rule{1em}{.4pt}}} + +\newenvironment{deprecate}{\deprecationstart \\}{\\ \deprecationend} + \newcommand{\apisummary}[1]{ #1 \hfill @@ -318,39 +393,56 @@ \end{description} } +\lstnewenvironment{Cpp11synopsis} +{ + \textbf{C++11:} + \lstset{language={C++}, backgroundcolor=\color{gray}, lineskip=2pt, + escapechar=@, + morekeywords={size_t, ptrdiff_t, TYPE, noreturn}, + aboveskip=0pt, belowskip=0pt}}{} + \lstnewenvironment{C11synopsis} { \textbf{C11:} - \lstset{language={C++}, backgroundcolor=\color{gray}, lineskip=2pt, - morekeywords={size_t, TYPE}, aboveskip=0pt, belowskip=0pt,}}{} + \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, + escapechar=@, + morekeywords={size_t, ptrdiff_t, TYPE, _Noreturn, shmem_ctx_t}, + aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisCol} { \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - morekeywords={size_t, TYPE, TYPENAME, SIZE}, aboveskip=0pt, belowskip=0pt}}{} + escapechar=@, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{Csynopsis} { \textbf{C/C++:} \lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - morekeywords={size_t, TYPE, TYPENAME, SIZE}, aboveskip=0pt, belowskip=0pt}}{} + escapechar=@, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + aboveskip=0pt, belowskip=0pt}}{} \lstnewenvironment{CsynopsisST} { \textbf{C/C++:} \color{red} {\lstset{language={C}, backgroundcolor=\color{gray}, lineskip=2pt, - morekeywords={size_t}, aboveskip=0pt, belowskip=0pt}} - } - {} - + escapechar=@, + morekeywords={size_t, ptrdiff_t, TYPE, TYPENAME, SIZE, shmem_ctx_t}, + aboveskip=0pt, belowskip=0pt}}}{} + \lstnewenvironment{Fsynopsis} -{ \textbf{FORTRAN:} +{ \deprecationstart \\ + \textbf{FORTRAN:} \lstset{language={Fortran}, backgroundcolor=\color{gray}, lineskip=3pt, + escapechar=@, deletekeywords=[2]{STATUS}, deletekeywords=[3]{LOG}, aboveskip=0pt, - belowskip=0pt}}{} + belowskip=0pt}} +{ \deprecationend } \newenvironment{apiarguments}{ \newcommand{\apiargument}[3]{ @@ -373,7 +465,9 @@ \begin{description} \vspace{-1em} \item[API description] \hfill \\ + \begin{sloppypar} #1 + \end{sloppypar} \hfill } @@ -419,7 +513,8 @@ \newcommand{\apicexample}[3]{ ##1 \lstinputlisting[language={C}, tabsize=2, - basicstyle=\ttfamily\footnotesize, morekeywords={size_t}]{##2} + basicstyle=\ttfamily\footnotesize, + morekeywords={size_t, ptrdiff_t, shmem_ctx_t}]{##2} ##3 } \newcommand{\apifexample}[3]{ ##1 diff --git a/utils/packages.tex b/utils/packages.tex index e7d591490..1b4b6f5b9 100644 --- a/utils/packages.tex +++ b/utils/packages.tex @@ -1,4 +1,5 @@ \usepackage[letterpaper,top=2.5cm,bottom=2.5cm,left=2.5cm,right=2.5cm]{geometry} +\usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage{graphicx} \usepackage{multicol} @@ -8,11 +9,13 @@ \usepackage{amsmath} \usepackage[table]{xcolor} \usepackage{xspace} +\usepackage{xhfill} \usepackage{fancyhdr} \usepackage[nolist]{acronym} \usepackage{listings} % note sure after here \usepackage{makeidx} +\usepackage{amsmath} \usepackage[UKenglish]{isodate} \usepackage{ifthen} \usepackage{textcomp} @@ -47,5 +50,6 @@ %\usepackage{draftcopy} %\usepackage{draftwatermark} \usepackage{wrapfig} +\usepackage{longtable} \usepackage{caption} \usepackage{subcaption}