1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
 int
 saf_write_field(SAF_ParMode pmode,      /* The parallel mode. */
                 SAF_Field *field,       /* The field to write. */
                 int member_count,       /* A count of the number of members of the collection in which the field's dofs are
                                          * n:1 associated that are actually being written in this call. This value is
                                          * ignored if you are writing the entire field's dofs in this call (i.e., REQ_TYPE is
                                          * SAF_TOTALITY). Also note that as a convenience, we provide the macro
                                          * SAF_WHOLE_FIELD which expands to a comma separated list of appropriate values for
                                          * this argument and the next two, for the case in which the whole field is being
                                          * written in this call. */
                 SAF_RelRep *req_type,   /* The type of I/O request. We use a relation representation type here to specify the
                                          * type of the partial request because it captures the necessary information. Pass
                                          * SAF_HSLAB if you are writing the dofs of a partial hyperslab of the members of the
                                          * associated collection. In this case, MEMBER_IDS points to 3 N-tuples of starts,
                                          * counts and strides of the hyperslab (hypersample) request. Pass SAF_TUPLES, if you
                                          * are writing the dofs for an arbitrary list of members of the associated collection.
                                          * In this case, the MEMBER_IDS points to a list of N-tuples. In both cases, 'N' is
                                          * the number of indexing dimensions in the associated collection. Finally, pass
                                          * SAF_TOTALITY if you are writing the entire field's set of dofs. */
                 int *member_ids,        /* Depending on the value of REQ_TYPE, this argument points to 3 N-tuples storing,
                                          * respectively, the starts, counts and strides *in*each*dimension* of the associated
                                          * collection or to a list of MEMBER_COUNT N-tuples, each one identifying a single
                                          * member of the associated collection or to NULL in the case of a SAF_TOTALITY
                                          * request. */
                 int nbufs,              /* The number of buffers. Valid values are either 1 or a value equal to the number of
                                          * components of the field. A value greater than 1 indicates that the field is stored
                                          * component by component, one buffer for each component. Note, however, that current
                                          * limitations of partial requests support only fields that are interleaved by
                                          * SAF_INTERLEAVE_VECTOR. This, in turn, means that in a partial I/O request, NBUFS
                                          * can only ever be one. */
                 hid_t buf_type,         /* The type of the objects in the buffer(s). If the buffer datatype was provided in
                                          * the saf_declare_field() call that produced the field handle then this parameter
                                          * should have a negative value. If however the datatype was not provided in the
                                          * saf_declare_field() or if the handle was the result of a find operation then the
                                          * datatype must be provided in this call. */
                 void **bufs,            /* The buffers. */
                 SAF_Db *file            /* Optional file into which the data is written. If none is supplied then the data is
                                          * written to the same file as the FIELD. */
                 )
 {
     SAF_ENTER(saf_write_field, SAF_PRECONDITION_ERROR);
     double              timer_start=0;
     SAF_FieldTmpl       ftmpl=SS_FIELDTMPL_NULL;
     SAF_Set             base=SS_SET_NULL;
     SAF_Algebraic       algebraic=SS_ALGEBRAIC_NULL;
     int                 buf_size;
     ss_collection_t     coll=SS_COLLECTION_NULL;
     ss_blob_t           dof_blob;       /* The blob that holds the dofs being written. */
     hsize_t             ndofs=1;        /* Total number of dofs to be written. Start at one and multiply it up. */
     hsize_t             my_blob_offset; /* Offset of first item of this task's data in the dof blob. */
     hsize_t             offset;         /* Offset of this task's data in the dof blob for each buffer in turn. */
     SAF_Field           *fields=NULL;   /* One-dimensional array of fields representing all fields from BUFS arrays. */
     hsize_t             my_blob_size;   /* Current size of blob based on what elements tasks are writing (used for creation). */
     hbool_t             should_write;   /* True if this task should call ss_blob_write(). */
     int                 bufno;          /* Counter over the BUFS. */
     ss_scope_t          scope;          /* The scope in which to create the new blob. */
     MPI_Comm            scope_comm;     /* The communicator for `scope' */
     int                 scope_self;     /* MPI task rank of calling task in scope_comm. */
     unsigned            flags;          /* Bit flags for blob operations */
     size_t              cur_ndofs;      /* Number of dofs currently in the indirect_fields array */

     SAF_REQUIRE(_saf_valid_pmode(pmode), SAF_LOW_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("PMODE must be valid"));
     if (!_saf_is_participating_proc(pmode)) SAF_RETURN(-1);

     SAF_REQUIRE(SS_FIELD(field), SAF_LOW_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("FIELD must be a valid field handle"));
     SAF_REQUIRE(SAF_XOR(SS_FIELD(field)->m.bufs, bufs), SAF_LOW_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("BUFS must be specified here or in the saf_declare_field() call (not both)"));
     SAF_REQUIRE(!SAF_XOR(nbufs, bufs), SAF_LOW_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("pass either valid BUFS and NBUFS>0 or NULL and NBUFS==0"));
     SAF_REQUIRE(_saf_is_valid_io_request(pmode, field, member_count, req_type, member_ids, nbufs),
                 SAF_HIGH_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("if partial I/O request, collection must be 1D indexed, REQ_TYPE must be SAF_HSLAB "
                             "or a single (e.g. MEMBER_COUNT=1) SAF_TUPLE and field's interleave, if multi-component, "
                             "must be SAF_INTERLEAVE_VECTOR"));
     SAF_REQUIRE((SS_FIELD(field)->m.data_type>0 || buf_type>0), SAF_LOW_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("buffer datatype must be specified in field declaration or write"));
     SAF_REQUIRE(SS_FIELD(field)->m.data_type<=0 || buf_type<=0 || H5Tequal(SS_FIELD(field)->m.data_type, buf_type),
                 SAF_LOW_CHK_COST, SAF_PRECONDITION_ERROR,
                 _saf_errmsg("buffer datatype must be consistent between field declaration and write"));

     /* Start the timer */
     if (_SAF_GLOBALS.p.TraceTimes)
         timer_start = _saf_wall_clock(FALSE);

     /* Where sould a new blob be created if we have to do that? */
     ss_pers_scope(file?(ss_pers_t*)file:(ss_pers_t*)field, &scope);
     ss_scope_comm(&scope, &scope_comm, &scope_self, NULL);

     /* Copy links to local variables for convenience. */
     ftmpl = SS_FIELD(field)->ftmpl;
     base = SS_FIELD(field)->base_space;
     algebraic = SS_FIELDTMPL(&ftmpl)->algebraic;

     /* If data was supplied in the saf_declare_field() call then use that data instead */
     if (SS_FIELD(field)->m.bufs) {
         bufs = SS_FIELD(field)->m.bufs;
         nbufs = SS_FIELD(field)->m.nbufs;
     }
     if (SS_FIELD(field)->m.data_type>0)
         buf_type = SS_FIELD(field)->m.data_type;
     SAF_ASSERT(nbufs==1 || nbufs==SS_FIELDTMPL(&ftmpl)->num_comps, SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                _saf_errmsg("NBUFS %i and ftmpl.num_comps %i not consistent", nbufs, SS_FIELDTMPL(&ftmpl)->num_comps));

     /* Check that all supplied buffer pointers in BUF are non-null */
     SAF_ASSERT_BEGIN(SAF_LOW_CHK_COST) {
         int i;
         ok = TRUE;
         if (bufs && nbufs>1) {
             for (i=0; i<nbufs && ok; i++) {
                 if (!bufs[i]) ok = FALSE;
             }
         }
     } SAF_ASSERT_END(SAF_ASSERTION_ERROR, _saf_errmsg("BUFS must point to NBUFS valid (e.g. non-NULL) pointers"));

     /* The only cases in which the data can be links to other fields is when the storage decomposition is not self or when the
      * algebraic type is indirect (i.e., SAF_FIELD) */
     SAF_ASSERT_BEGIN(SAF_LOW_CHK_COST) {
         if (H5Tequal(buf_type, ss_pers_tm)) {
             ok = (SS_ALGEBRAIC(&algebraic)->indirect || !_saf_is_self_decomp(SS_FIELD_P(field,storage_decomp_cat)));
         } else {
             ok = (!SS_ALGEBRAIC(&algebraic)->indirect && _saf_is_self_decomp(SS_FIELD_P(field,storage_decomp_cat)));
         }
     } SAF_ASSERT_END(SAF_ASSERTION_ERROR,
                      _saf_errmsg("Data can be field links only if the algebraic type is indirect (SAF_FIELD) or the storage "
                                  "decomponsition is not SAF_SELF."));

     /* If we're writing the whole field then ignore what the user passed in for MEMBER_COUNT and instead look at either the
      * field's dof_assoc_cat or, for an indirect field, its storage_decomp_cat. */
     if (SS_RELREP(req_type)->id==SAF_TOTALITY_ID) {
         if (!_saf_is_self_decomp(SS_FIELD_P(field,dof_assoc_cat))) {
             if (_saf_is_self_decomp(SS_FIELD_P(field,storage_decomp_cat))) {
                 _saf_getCollection_set(&base, SS_FIELD_P(field,dof_assoc_cat), &coll);
             } else {
                 _saf_getCollection_set(&base, SS_FIELD_P(field,storage_decomp_cat), &coll);
             }
             SAF_ASSERT(!SS_PERS_ISNULL(&coll), SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                        _saf_errmsg("_saf_getCollection_set failed"));
             member_count = SS_COLLECTION(&coll)->count;
         } else {
             member_count = 1;
         }
     }
     ndofs *= member_count;

     /* Multiply by the field's association ratio */
     ndofs *= SS_FIELD(field)->assoc_ratio;

     /* Now multiply by the field template's number of components. */
     if (_saf_is_primitive_type(buf_type)) {
         assert(SS_FIELDTMPL(&ftmpl)->num_comps>=0);
         ndofs *= SS_FIELDTMPL(&ftmpl)->num_comps;
     } else if (H5Tequal(buf_type, ss_pers_tm)) {
         if (_saf_is_self_decomp(SS_FIELD_P(field,storage_decomp_cat))) {
             assert(SS_FIELDTMPL(&ftmpl)->num_comps>=0);
             ndofs *= SS_FIELDTMPL(&ftmpl)->num_comps;
         }
     } else {
 #ifdef SSLIB_SUPPORT_PENDING
         SAF_ASSERT((ftmplr.num_comps == DSL_rankOf_type(buf_type)), SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                    _saf_errmsg("for a non-primitive datatype, the type must have a rank equal to the number of components "
                                "of the field"));
 #endif /*SSLIB_SUPPORT_PENDING*/
         SAF_ASSERT(nbufs==1, SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                    _saf_errmsg("for a non-primitive datatype, their must be only one buffer"));
         SAF_ASSERT(SS_FIELD(field)->comp_intlv == SAF_INTERLEAVE_VECTOR, SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                    _saf_errmsg("for a non-primitive datatype, the interleave must be SAF_INTERLEAVE_VECTOR"));
     }

     /* Number of dofs per buffer */
     SAF_ASSERT(0==ndofs % nbufs, SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                _saf_errmsg("NBUFS, %d, must evenly divide into size, %d", nbufs, ndofs));
     buf_size = (int)ndofs / nbufs;



     /* If the data is field handles (i.e., the algebric type is indirect (SAF_FIELD) or the storage decomponsition is not
      * SAF_SELF), then store those field handles in a variable length array in the field. We use a variable length array
      * because it's able to handle the conversion from memory to file representation of persistent object links and is well
      * suited for the small array of field links. */
     if (H5Tequal(buf_type, ss_pers_tm)) {
         /* Make sure that we haven't already stored data that isn't field links. */
         SAF_ASSERT(SS_PERS_ISNULL(SS_FIELD_P(field,dof_blob)), SAF_LOW_CHK_COST, SAF_ASSERTION_ERROR,
                    _saf_errmsg("field already has data written in a DOF blob"));

         /* Convert the separate field arrays into a single array */
         if (NULL==(fields=_saf_field_handles_1d(nbufs, (SAF_Field**)bufs, buf_size)))
             SAF_ERROR(-1, _saf_errmsg("unable to convert handles to 1d array"));

         /* Where does the data land in the variable length array? */
         offset = SAF_TOTALITY_ID==SS_RELREP(req_type)->id ? 0 : member_ids[0];

         /* We adjust the offset for the case of a state field because all component handles are compressed into one buf, not
          * NBUFS blobs. */
         assert(0==offset || SS_FIELDTMPL(&ftmpl)->num_comps>=0);
         offset *= SS_FIELDTMPL(&ftmpl)->num_comps;

         /* ISSUE: Is it possible that a SAF_EACH call will have a different offset and data for each task? If so we'll have to
          *        do some communicating first otherwise ss_file_synchronize() will see that each task made incompatible
          *        modifications to this object. This code just checks that for now. [rpm 2004-06-07] */
         {
             int taskno, ntasks=ss_mpi_comm_size(scope_comm);
             unsigned long *all_offsets = malloc(ntasks*sizeof(*all_offsets));
             all_offsets[scope_self] = offset;
             ss_mpi_allgather(all_offsets, 1, MPI_UNSIGNED_LONG, scope_comm);
             for (taskno=0; taskno<ntasks; taskno++) {
                 if (all_offsets[taskno]!=all_offsets[scope_self]) {
                     SAF_ERROR(-1, _saf_errmsg("offset[task=%d]=%lu; offset[task=%d]=%lu\n",
                                               taskno, all_offsets[taskno], scope_self, all_offsets[scope_self]));
                 }
             }
             SS_FREE(all_offsets);
         }

         /* Insert the field handles into the array and free the buffer, extending the array if necessary */
         SAF_DIRTY(field, pmode);
         cur_ndofs = ss_array_nelmts(SS_FIELD_P(field,indirect_fields));
         if (cur_ndofs<offset+ndofs)
             ss_array_resize(SS_FIELD_P(field,indirect_fields), (size_t)(offset+ndofs));
         ss_array_put(SS_FIELD_P(field,indirect_fields), ss_pers_tm, (size_t)offset, (size_t)ndofs, fields);
         fields = SS_FREE(fields);

         goto done;
     }

     /* We can't get here without passing the valid_io_request pre-condition and all the limitations it currently imposes. So,
      * we know member_ids is either an array of 3 ints {start, count, stride} where stride is constrained to 1 for SAF_HSLAB or
      * an array of 1 int {index} for SAF_TUPLES. Regardless, member_ids[0] is the starting position and member_count is the
      * size of the request. */

     /* Where will each task's contribution land in the blob? We call this `my_blob_offset'. */
     if (SAF_ALL==pmode) {
         if (SAF_TOTALITY_ID==SS_RELREP(req_type)->id) {
             /* Every task is providing all the data. member_ids is probably null. */
             my_blob_size = ndofs;
             my_blob_offset = 0;
             should_write = (0==scope_self);
         } else {
             /* All tasks are providing identical data destined for identical locations in the blob. */
             my_blob_size = member_ids[0] + ndofs;
             my_blob_offset = member_ids[0];
             should_write = (0==scope_self);
         }
     } else {
         if (SAF_TOTALITY_ID==SS_RELREP(req_type)->id) {
             my_blob_size = ndofs;
             my_blob_offset = 0;
             should_write = TRUE;
         } else {
             /* Each task is providing some data (possibly none) at it's own base offset. */
             my_blob_size = member_ids[0] + ndofs;
             my_blob_offset = member_ids[0];
             should_write = TRUE;
         }
     }

     /* Create or extend the blob(s) and underlying dataset. */
     dof_blob = SS_FIELD(field)->dof_blob;
     if (SS_PERS_ISNULL(&dof_blob)) {
         /* Create the blobs if they don't exist yet. */
         if (NULL==ss_blob_new(&scope, SAF_ALL==pmode?SS_ALLSAME:0U, &dof_blob))
             SAF_ERROR(SAF_FILE_ERROR, _saf_errmsg("cannot create field dof blob"));

         /* Temporarily bind some memory just so we can create the dataset */
         if (ss_blob_bind_m1(&dof_blob, (void*)1, buf_type, my_blob_size)<0)
             SAF_ERROR(SAF_FILE_ERROR, _saf_errmsg("cannot bind memory to field dof blob"));
         flags = (SAF_ALL==pmode?SS_ALLSAME:SS_BLOB_EACH) |
                 (SS_SET(&base)->is_extendible?SS_BLOB_EXTEND:0U);
         if (ss_blob_mkstorage(&dof_blob, NULL, flags, NULL)<0)
             SAF_ERROR(SAF_FILE_ERROR, _saf_errmsg("cannot create field dof blob dataset"));
         if (ss_blob_bind_m1(&dof_blob, NULL, -1, (hsize_t)0)<0)
             SAF_ERROR(SAF_FILE_ERROR, _saf_errmsg("cannot unbind memory from field dof blob"));

         /* Save the new blob pointer in the field */
         SAF_DIRTY(field,pmode);
         SS_FIELD(field)->dof_blob = dof_blob;
     } else if (SS_SET(SS_FIELD_P(field,base_space))->is_extendible) {
         /* The ss_blob_write1() below is independent so we need to extend the dataset here if necessary */
         if (SAF_ALL!=pmode)
             SAF_ERROR(SAF_NOTIMPL_ERROR, _saf_errmsg("extending in SAF_EACH mode is not implemented yet"));
         ss_blob_extend1(&dof_blob, my_blob_size, SS_ALLSAME, NULL);
     }

     /* Write all buffers to the blob. Since each task may have a different number of buffers we have to use independent I/O */
     if (should_write) {
         for (bufno=0; bufno<nbufs; bufno++) {
             ss_blob_bind_m1(&dof_blob, bufs[bufno], buf_type, (hsize_t)buf_size);
             offset = my_blob_offset + bufno*buf_size;
             ss_blob_write1(&dof_blob, offset, (hsize_t)buf_size, SS_BLOB_UNBIND, NULL);
         }
     }

 done:
     SS_FIELD(field)->m.bufs = NULL;
     SS_FIELD(field)->m.nbufs = 0;
     SS_FIELD(field)->m.buf_size = 0;
 #if 0 /* Do not clear this one: the declared datatype should stick around */
     SS_FIELD(field)->m.data_type = 0;
 #endif

     /* Time accounting */
     if (_SAF_GLOBALS.p.TraceTimes)
         _SAF_GLOBALS.CummWriteTime += (_saf_wall_clock(false) - timer_start);

     SAF_LEAVE(SAF_SUCCESS);
 }