Line data Source code
1 : // Copyright (C) 2015 Technische Universitaet Muenchen
2 : // This file is part of the Mamico project. For conditions of distribution
3 : // and use, please see the copyright notice in Mamico's main folder
4 :
5 8 : template <class Cell_T, unsigned int dim> coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::SendReceiveBuffer() {
6 8 : deleteBuffers();
7 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
8 8 : _requestsAllocated = false;
9 8 : _requests = NULL;
10 :
11 8 : int err = MPI_Op_create(&elementWiseSum, true, &elementWiseSumOperation);
12 8 : if (err != MPI_SUCCESS) {
13 0 : char* str = new char[MPI_MAX_ERROR_STRING];
14 : int len;
15 0 : MPI_Error_string(err, str, &len);
16 0 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::SendReceiveBuffer(): MPI_Op_create failed with error: " << str << std::endl;
17 0 : delete[] str;
18 0 : exit(EXIT_FAILURE);
19 : }
20 : #endif
21 8 : }
22 :
23 0 : template <class Cell_T, unsigned int dim> coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::~SendReceiveBuffer() {
24 0 : deleteBuffers();
25 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
26 0 : if (_requests != NULL) {
27 0 : delete[] _requests;
28 0 : _requests = NULL;
29 : }
30 0 : for (MPI_Comm& subcomm : _subComms) {
31 0 : if (subcomm != MPI_COMM_NULL) {
32 0 : MPI_Comm_free(&subcomm);
33 0 : subcomm = MPI_COMM_NULL;
34 : }
35 : }
36 0 : for (MPI_Group& subgroup : _subGroups) {
37 0 : if (subgroup != MPI_GROUP_NULL) {
38 0 : MPI_Group_free(&subgroup);
39 0 : subgroup = MPI_GROUP_NULL;
40 : }
41 : }
42 0 : _subComms.clear();
43 0 : _subGroups.clear();
44 :
45 0 : MPI_Op_free(&elementWiseSumOperation);
46 : #endif
47 0 : }
48 :
49 16 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::deleteBuffer(std::map<unsigned int, BufferWithID>& buffer) {
50 16 : for (typename std::map<unsigned int, BufferWithID>::iterator it = buffer.begin(); it != buffer.end(); it++) {
51 0 : if ((it->second).buffer != NULL) {
52 0 : free((it->second).buffer);
53 0 : (it->second).buffer = NULL;
54 : }
55 0 : (it->second).bufferSize = 0;
56 : }
57 16 : buffer.clear();
58 16 : }
59 :
60 8 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::deleteBuffers() {
61 8 : deleteBuffer(_sendBuffer);
62 8 : deleteBuffer(_receiveBuffer);
63 8 : _bcastBuffer.clear();
64 8 : _reduceBuffer.clear();
65 8 : }
66 :
67 : #pragma region collective operations
68 :
69 : template <class Cell_T, unsigned int dim>
70 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::writeToBcastBuffer(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange,
71 : const Cell_T& couplingCell, I01 idx) {
72 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
73 :
74 : unsigned int intNumberProcesses = IDXS.getScalarNumberProcesses();
75 : // determine the ranks which need to receive information of this coupling cell
76 : const std::vector<unsigned int> ranks = dataExchange.getTargetRanks(idx);
77 :
78 : if (ranks.empty()) {
79 : return;
80 : }
81 :
82 : // determine unique subdomains which need to receive this information
83 : // and associate them with targetRanks
84 : std::map<unsigned int, std::set<unsigned int>> subdomainMap;
85 : for (const unsigned int& rank : ranks) {
86 :
87 : unsigned int subdomain = rank % intNumberProcesses;
88 : unsigned int key = IDXS.getRank() * intNumberProcesses + subdomain;
89 :
90 : subdomainMap[key].insert(rank);
91 : }
92 :
93 : const unsigned int globalIndex = I00{idx}.get();
94 :
95 : // collect data for subdomains
96 : for (std::pair<const unsigned int, std::set<unsigned int>>& subdomain : subdomainMap) {
97 : unsigned int key = subdomain.first;
98 :
99 : // create new buffer if non-existent and insert
100 : // information about ranks
101 : _bcastBuffer[key].rootRank = IDXS.getRank();
102 : _bcastBuffer[key].nonRootRanks.insert(subdomain.second.begin(), subdomain.second.end());
103 :
104 : if (_bcastBuffer[key].cellIndices.find(globalIndex) != _bcastBuffer[key].cellIndices.end()) {
105 : // buffer already contains data of cell, nothing to do!
106 : continue;
107 : }
108 : _bcastBuffer[key].cellIndices.insert(globalIndex);
109 :
110 : // resize buffer for additional data
111 : const unsigned int firstPos = _bcastBuffer[key].buffer.size();
112 : _bcastBuffer[key].buffer.resize(_bcastBuffer[key].buffer.size() + doublesPerCell);
113 :
114 : // write global cell index and call to specialised function to read from coupling cell and write to send buffer
115 : _bcastBuffer[key].buffer[firstPos] = (double)globalIndex;
116 :
117 : dataExchange.readFromCell(&_bcastBuffer[key].buffer.data()[firstPos + 1], couplingCell);
118 : }
119 : }
120 :
121 : template <class Cell_T, unsigned int dim>
122 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::readFromCollectiveBuffer(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange,
123 : Cell_T& couplingCell, I01 idx) {
124 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
125 : const unsigned int globalIndex = I00{idx}.get();
126 :
127 : // loop over all received buffers
128 : for (typename std::map<unsigned int, BufferCollective>::const_iterator it = _bcastBuffer.begin(); it != _bcastBuffer.end(); it++) {
129 :
130 : // loop over all received cells of this buffer
131 : const unsigned int bufferSize = it->second.buffer.size();
132 : for (unsigned int i = 0; i < bufferSize; i += doublesPerCell) {
133 :
134 : // determine global cell index for this coupling cell
135 : const auto thisIndex = (unsigned int)it->second.buffer[i];
136 :
137 : // if this is the correct cell, write data into coupling cell and return
138 : if (thisIndex == globalIndex) {
139 : // read receive buffer and write to coupling cell (use specialised function)
140 : dataExchange.writeToCell(&(it->second.buffer.data()[i + 1]), couplingCell);
141 : return;
142 : }
143 : } // loop over buffer
144 : } // loop over all buffers
145 : }
146 :
147 : template <class Cell_T, unsigned int dim>
148 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::allocateBcastBufferForReceiving(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange, I01 idx) {
149 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
150 :
151 : // determine the ranks from which we need to receive information of this coupling cell
152 : const std::vector<unsigned int> sourceRanks = dataExchange.getSourceRanks(idx);
153 : const std::vector<unsigned int> targetRanks = dataExchange.getTargetRanks(idx);
154 : if (sourceRanks.empty()) {
155 : return;
156 : }
157 : #if (COUPLING_MD_DEBUG == COUPLING_MD_YES)
158 : const std::vector<unsigned int> ranks = dataExchange.getTargetRanks(idx);
159 : const unsigned int ranksSize = (unsigned int)ranks.size();
160 : std::cout << "Rank " << IDXS.getRank() << ": allocate receive buffers for cell " << idx << ";";
161 : std::cout << " associated source ranks: ";
162 : for (unsigned int i = 0; i < ranksSize; i++) {
163 : std::cout << ranks[i] << " ";
164 : }
165 : std::cout << std::endl;
166 : #endif
167 :
168 : unsigned int intNumberProcesses = IDXS.getScalarNumberProcesses();
169 :
170 : const unsigned int globalIndex = I00{idx}.get();
171 :
172 : unsigned int thisSubdomain = IDXS.getRank() % intNumberProcesses;
173 :
174 : std::set<unsigned int> subdomainRanks;
175 :
176 : // Determine all receiving ranks of this subdomain
177 : for (const unsigned int& rank : targetRanks) {
178 : if (rank % intNumberProcesses == thisSubdomain) {
179 : subdomainRanks.insert(rank);
180 : }
181 : }
182 : if (subdomainRanks.empty()) {
183 : return;
184 : }
185 :
186 : // loop over source ranks
187 : for (const unsigned int& sourceRank : sourceRanks) {
188 : if (sourceRank == IDXS.getRank()) {
189 : continue;
190 : }
191 : unsigned int key = sourceRank * intNumberProcesses + thisSubdomain;
192 :
193 : _bcastBuffer[key].rootRank = sourceRank;
194 : _bcastBuffer[key].nonRootRanks.insert(subdomainRanks.begin(), subdomainRanks.end());
195 :
196 : if (_bcastBuffer[key].cellIndices.find(globalIndex) != _bcastBuffer[key].cellIndices.end()) {
197 : // space for this cell already allocated, nothing to do!
198 : continue;
199 : }
200 : _bcastBuffer[key].cellIndices.insert(globalIndex);
201 :
202 : // increment buffer size and resize buffer
203 : const unsigned int firstPos = _bcastBuffer[key].buffer.size();
204 : _bcastBuffer[key].buffer.resize(_bcastBuffer[key].buffer.size() + doublesPerCell);
205 : // set all values to -1.0
206 : for (unsigned int j = firstPos; j < firstPos + doublesPerCell; j++) {
207 : _bcastBuffer[key].buffer[j] = -1.0;
208 : }
209 : } // rank
210 : }
211 :
212 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::triggerBcasts(const unsigned int thisRank) {
213 :
214 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
215 : int counter = 0;
216 : for (std::pair<const unsigned int, BufferCollective>& buffer : _bcastBuffer) {
217 : int err = MPI_Ibcast(buffer.second.buffer.data(), static_cast<int>(buffer.second.buffer.size()), MPI_DOUBLE, 0, _subComms[counter], &_requests[counter]);
218 : if (err != MPI_SUCCESS) {
219 : char* str = new char[MPI_MAX_ERROR_STRING];
220 : int len = -1;
221 : MPI_Error_string(err, str, &len);
222 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::triggerBcasts(): Bcasting from rank " << thisRank << " on subcomm " << counter
223 : << " failed with error: " << str << std::endl;
224 : delete[] str;
225 : exit(EXIT_FAILURE);
226 : }
227 : counter++;
228 : }
229 : #endif
230 : }
231 :
232 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::triggerReduce(const unsigned int thisRank) {
233 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
234 : unsigned int counter = 0;
235 : for (std::pair<const unsigned int, BufferCollective>& buffer : _reduceBuffer) {
236 :
237 : if (buffer.second.nonRootRanks.empty() || (buffer.second.nonRootRanks.size() == 1 && *buffer.second.nonRootRanks.begin() == buffer.second.rootRank)) {
238 : continue;
239 : }
240 : int rankInSubComm;
241 : MPI_Comm_rank(_subComms[counter], &rankInSubComm);
242 :
243 : double *sendBuffer, *recvBuffer;
244 :
245 : if (rankInSubComm == 0) {
246 : sendBuffer = static_cast<double*>(MPI_IN_PLACE);
247 : recvBuffer = buffer.second.buffer.data();
248 : } else {
249 : sendBuffer = buffer.second.buffer.data();
250 : recvBuffer = nullptr;
251 : }
252 :
253 : int err = MPI_Ireduce(sendBuffer, recvBuffer, static_cast<int>(buffer.second.buffer.size()), MPI_DOUBLE, elementWiseSumOperation, 0, _subComms[counter],
254 : &_requests[counter]);
255 : if (err != MPI_SUCCESS) {
256 : char* str = new char[MPI_MAX_ERROR_STRING];
257 : int len;
258 : MPI_Error_string(err, str, &len);
259 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::triggerReduce(): Receiving Bcast on rank " << thisRank << " from " << buffer.first
260 : << " failed with error: " << str << std::endl;
261 : delete[] str;
262 : exit(EXIT_FAILURE);
263 : }
264 : counter += 1;
265 : }
266 : #endif
267 : }
268 :
269 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::waitAllCollectiveOperations() {
270 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
271 : // if no requests have been allocated, return immediately
272 : if (!_requestsAllocated && (_requests == nullptr)) {
273 : return;
274 : }
275 : if (_requests == nullptr) {
276 : std::cout << "ERROR coupling::SendReceiveBuffer::waitAllCollectiveOperations(): _requests==NULL!" << std::endl;
277 : exit(EXIT_FAILURE);
278 : }
279 : int err = MPI_Waitall(_bcastOrReduceSize, _requests, MPI_STATUSES_IGNORE);
280 : if (err != MPI_SUCCESS) {
281 : char* str = new char[MPI_MAX_ERROR_STRING];
282 : int len = -1;
283 : MPI_Error_string(err, str, &len);
284 : delete[] str;
285 : exit(EXIT_FAILURE);
286 : }
287 : delete[] _requests;
288 : /*
289 : for (MPI_Comm& subcomm : _subComms) {
290 : if (subcomm != MPI_COMM_NULL) {
291 : MPI_Comm_free(&subcomm);
292 : subcomm = MPI_COMM_NULL;
293 : }
294 : }
295 : for (MPI_Group& subgroup : _subGroups) {
296 : if (subgroup != MPI_GROUP_NULL) {
297 : MPI_Group_free(&subgroup);
298 : subgroup = MPI_GROUP_NULL;
299 : }
300 : }
301 : */
302 : _requests = nullptr;
303 : _requestsAllocated = false;
304 : #endif
305 : }
306 :
307 : template <class Cell_T, unsigned int dim>
308 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::allocateReduceBufferForReceiving(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange,
309 : I01 idx) {
310 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
311 :
312 : // determine the ranks from which we need to receive information of this coupling cell
313 : const std::vector<unsigned int> sourceRanks = dataExchange.getSourceRanks(idx);
314 : const std::vector<unsigned int> targetRanks = dataExchange.getTargetRanks(idx);
315 : if (std::find(targetRanks.begin(), targetRanks.end(), IDXS.getRank()) == targetRanks.end()) {
316 : // Nothing to do for this rank and subdomain!
317 : return;
318 : }
319 : if (sourceRanks.empty()) {
320 : return;
321 : }
322 : #if (COUPLING_MD_DEBUG == COUPLING_MD_YES)
323 : const unsigned int ranksSize = (unsigned int)sourceRanks.size();
324 : std::cout << "Rank " << IDXS.getRank() << ": allocate reduce buffers for cell " << idx << ";";
325 : std::cout << " associated source ranks: ";
326 : for (unsigned int i = 0; i < ranksSize; i++) {
327 : std::cout << sourceRanks[i] << " ";
328 : }
329 : std::cout << std::endl;
330 : #endif
331 :
332 : unsigned int intNumberProcesses = IDXS.getScalarNumberProcesses();
333 :
334 : const unsigned int globalIndex = I00{idx}.get();
335 :
336 : // unsigned int thisSubdomain = IDXS.getRank() % intNumberProcesses;
337 :
338 : std::map<unsigned int, std::set<unsigned int>> subdomainMap;
339 :
340 : // Determine all sending ranks of this subdomain
341 : for (const unsigned int& rank : sourceRanks) {
342 : const unsigned int subdomain = rank % intNumberProcesses;
343 : const unsigned int key = IDXS.getRank() * intNumberProcesses + subdomain;
344 : subdomainMap[key].insert(rank);
345 : }
346 :
347 : // loop over source ranks
348 : for (std::pair<const unsigned int, std::set<unsigned int>>& subdomainEntry : subdomainMap) {
349 : // if(sourceRank == IDXS.getRank()) { continue; }
350 : unsigned int key = subdomainEntry.first;
351 :
352 : _reduceBuffer[key].rootRank = IDXS.getRank();
353 : _reduceBuffer[key].nonRootRanks.insert(subdomainEntry.second.begin(), subdomainEntry.second.end());
354 :
355 : if (_reduceBuffer[key].cellIndices.find(globalIndex) != _reduceBuffer[key].cellIndices.end()) {
356 : // space for this cell already allocated, nothing to do!
357 : continue;
358 : }
359 : _reduceBuffer[key].cellIndices.insert(globalIndex);
360 :
361 : // increment buffer size and resize buffer
362 : const unsigned int firstPos = _reduceBuffer[key].buffer.size();
363 : _reduceBuffer[key].buffer.resize(_reduceBuffer[key].buffer.size() + doublesPerCell);
364 : // set all values to 0.0
365 : for (unsigned int j = firstPos; j < firstPos + doublesPerCell; j++) {
366 : _reduceBuffer[key].buffer[j] = 0.0;
367 : }
368 : } // rank
369 : }
370 :
371 : template <class Cell_T, unsigned int dim>
372 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::writeToReduceBuffer(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange, const Cell_T& cell,
373 : I01 idx) {
374 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
375 :
376 : const unsigned int globalIndex = I00{idx}.get();
377 :
378 : unsigned int intNumberProcesses = IDXS.getScalarNumberProcesses();
379 : // determine the ranks which need to receive information of this coupling cell
380 : const std::vector<unsigned int> targetRanks = dataExchange.getTargetRanks(idx);
381 : const std::vector<unsigned int> sourceRanks = dataExchange.getSourceRanks(idx);
382 :
383 : if (targetRanks.empty()) {
384 : return;
385 : }
386 :
387 : // subdomain for this process
388 : unsigned int subdomainID = IDXS.getRank() % intNumberProcesses;
389 :
390 : // determine unique subdomains which need to send this information
391 : // and associate them with nonRootRanks
392 : // note: this rank may also be part of nonRootRanks
393 : std::map<unsigned int, std::set<unsigned int>> subdomainMap;
394 : for (const unsigned int& targetRank : targetRanks) {
395 :
396 : unsigned int key = targetRank * intNumberProcesses + subdomainID;
397 : // subdomainMap[key].insert(IDXS.getRank());
398 : for (const unsigned int& sourceRank : sourceRanks) {
399 : if (sourceRank % intNumberProcesses == subdomainID) {
400 : subdomainMap[key].insert(sourceRank);
401 : }
402 : }
403 : }
404 :
405 : // collect data for subdomains
406 : for (std::pair<const unsigned int, std::set<unsigned int>>& subdomain : subdomainMap) {
407 : unsigned int key = subdomain.first;
408 :
409 : // create new buffer if non-existent and insert
410 : // information about ranks
411 : _reduceBuffer[key].rootRank = (key - subdomainID) / intNumberProcesses;
412 : _reduceBuffer[key].nonRootRanks.insert(subdomain.second.begin(), subdomain.second.end());
413 :
414 : if (_reduceBuffer[key].cellIndices.find(globalIndex) != _reduceBuffer[key].cellIndices.end()) {
415 : // buffer already contains data of cell, nothing to do!
416 : continue;
417 : }
418 : _reduceBuffer[key].cellIndices.insert(globalIndex);
419 :
420 : // resize buffer for additional data
421 : const unsigned int firstPos = _reduceBuffer[key].buffer.size();
422 : _reduceBuffer[key].buffer.resize(_reduceBuffer[key].buffer.size() + doublesPerCell);
423 :
424 : // write global cell index and call to specialised function to read from coupling cell and write to send buffer
425 : _reduceBuffer[key].buffer[firstPos] = (double)globalIndex;
426 :
427 : dataExchange.readFromCell(&_reduceBuffer[key].buffer.data()[firstPos + 1], cell);
428 : }
429 : }
430 :
431 : template <class Cell_T, unsigned int dim>
432 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::readFromReduceBuffer(coupling::sendrecv::DataExchangeFromMD2Macro<dim>& dataExchange,
433 : Cell_T& couplingCell, const I01 idx) {
434 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
435 : const unsigned int globalIndex = I00{idx}.get();
436 :
437 : // loop over all received buffers
438 : for (typename std::map<unsigned int, BufferCollective>::const_iterator it = _reduceBuffer.begin(); it != _reduceBuffer.end(); it++) {
439 :
440 : // loop over all received cells of this buffer
441 : const unsigned int bufferSize = it->second.buffer.size();
442 : for (unsigned int i = 0; i < bufferSize; i += doublesPerCell) {
443 :
444 : // determine global cell index for this coupling cell.
445 : // Due to the reduction, the cellIndices now look like cellIndice*commSize. We thus have to convert this value
446 : // back to its original.
447 : const auto thisIndex = (unsigned int)it->second.buffer[i] / it->second.nonRootRanks.size();
448 :
449 : // if this is the correct cell, write data into coupling cell and return
450 : if (thisIndex == globalIndex) {
451 : // read receive buffer and write to coupling cell (use specialised function)
452 : dataExchange.writeToCell(&(it->second.buffer.data()[i + 1]), couplingCell);
453 : return;
454 : }
455 : } // loop over buffer
456 : } // loop over all buffers
457 : }
458 :
459 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::allocateBcastRequests(const unsigned int thisRank) {
460 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
461 :
462 : if (_requestsAllocated) {
463 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateBcastRequests(): allocateRequests() called although allocation already took place!" << std::endl;
464 : exit(EXIT_FAILURE);
465 : }
466 : if (_requests != nullptr) {
467 : delete[] _requests;
468 : _requests = nullptr;
469 : }
470 :
471 : // determine number of MPI requests
472 : _bcastOrReduceSize = _bcastBuffer.size();
473 :
474 : // nop, that is no request allocations, if there are no MPI transfers
475 : if (_bcastOrReduceSize == 0) {
476 : return;
477 : }
478 :
479 : _requests = new MPI_Request[_bcastOrReduceSize];
480 : if (_requests == nullptr) {
481 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateRequests(): _requests==NULL!" << std::endl;
482 : exit(EXIT_FAILURE);
483 : }
484 :
485 : // TODO better method for checking whether groups changed
486 : if (static_cast<unsigned int>(_bcastOrReduceSize) == _subComms.size()) {
487 : return;
488 : }
489 :
490 : _subComms.resize(_bcastOrReduceSize);
491 : _subGroups.resize(_bcastOrReduceSize);
492 :
493 : MPI_Group group;
494 : MPI_Comm_group(MPI_COMM_WORLD, &group);
495 :
496 : // groups for receiving broadcasts
497 : unsigned int counter = 0;
498 : for (std::pair<const unsigned int, BufferCollective>& buffer : _bcastBuffer) {
499 : auto duplicate = buffer.second.nonRootRanks.find(buffer.second.rootRank);
500 : if (duplicate != buffer.second.nonRootRanks.end()) {
501 : buffer.second.nonRootRanks.erase(duplicate);
502 : }
503 :
504 : std::vector<int> groupRanks(buffer.second.nonRootRanks.size() + 1);
505 : std::copy(buffer.second.nonRootRanks.begin(), buffer.second.nonRootRanks.end(), std::next(groupRanks.begin()));
506 : groupRanks[0] = buffer.second.rootRank;
507 :
508 : MPI_Group_incl(group, static_cast<int>(groupRanks.size()), groupRanks.data(), &_subGroups[counter]);
509 : int err = MPI_Comm_create_group(MPI_COMM_WORLD, _subGroups[counter], static_cast<int>(buffer.first), &_subComms[counter]);
510 : if (err != MPI_SUCCESS) {
511 : char* str = new char[MPI_MAX_ERROR_STRING];
512 : int len = -1;
513 : MPI_Error_string(err, str, &len);
514 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::allocateBcastRequests(): Unable to create communicator "
515 : << "on rank " << thisRank << " together with ranks [ ";
516 : for (int& item : groupRanks) {
517 : std::cout << item << " ";
518 : }
519 : std::cout << " ]" << std::endl;
520 : delete[] str;
521 : exit(EXIT_FAILURE);
522 : }
523 :
524 : // std::cout << thisRank << " : " << " allocated request for group " << groupRanks << " with tag " << buffer.first << std::endl;
525 :
526 : counter += 1;
527 : }
528 :
529 : _requestsAllocated = true;
530 : #endif
531 : }
532 :
533 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::allocateReduceRequests(const unsigned int thisRank) {
534 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
535 :
536 : if (_requestsAllocated) {
537 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateReduceRequests(): allocateRequests() called although allocation already took place!" << std::endl;
538 : exit(EXIT_FAILURE);
539 : }
540 : if (_requests != nullptr) {
541 : delete[] _requests;
542 : _requests = nullptr;
543 : }
544 :
545 : if (_reduceBuffer.empty()) {
546 : return;
547 : }
548 : // determine number of MPI requests
549 : _bcastOrReduceSize = 0;
550 : for (std::pair<const unsigned int, BufferCollective>& buffer : _reduceBuffer) {
551 : const std::set<unsigned int>& nonRootRanks = buffer.second.nonRootRanks;
552 : if (!nonRootRanks.empty() && (nonRootRanks.size() > 1 || nonRootRanks.find(buffer.second.rootRank) == nonRootRanks.end())) {
553 : _bcastOrReduceSize += 1;
554 : }
555 : }
556 :
557 : // nop, that is no request allocations, if there are no MPI transfers
558 : if (_bcastOrReduceSize == 0) {
559 : return;
560 : }
561 :
562 : _requests = new MPI_Request[_bcastOrReduceSize];
563 : if (_requests == nullptr) {
564 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateRequests(): _requests==NULL!" << std::endl;
565 : exit(EXIT_FAILURE);
566 : }
567 :
568 : // TODO better method for checking whether groups changed
569 : if (static_cast<unsigned int>(_bcastOrReduceSize) == _subComms.size()) {
570 : return;
571 : }
572 :
573 : _subComms.resize(_bcastOrReduceSize);
574 : _subGroups.resize(_bcastOrReduceSize);
575 :
576 : MPI_Group group;
577 : MPI_Comm_group(MPI_COMM_WORLD, &group);
578 :
579 : // groups for receiving broadcasts
580 : unsigned int counter = 0;
581 : for (std::pair<const unsigned int, BufferCollective>& buffer : _reduceBuffer) {
582 : if (buffer.second.nonRootRanks.empty() || (buffer.second.nonRootRanks.size() == 1 && *buffer.second.nonRootRanks.begin() == buffer.second.rootRank)) {
583 : continue;
584 : }
585 :
586 : // Sort all ranks into a vector, deleting possible duplicate of root rank
587 : std::vector<int> groupRanks(buffer.second.nonRootRanks.size() + 1);
588 : std::copy(buffer.second.nonRootRanks.begin(), buffer.second.nonRootRanks.end(), std::next(groupRanks.begin()));
589 : groupRanks[0] = buffer.second.rootRank;
590 : auto duplicate = std::find(std::next(groupRanks.begin()), groupRanks.end(), groupRanks[0]);
591 : if (duplicate != groupRanks.end()) {
592 : groupRanks.erase(duplicate);
593 : }
594 :
595 : // Create sub communicator of ranks
596 : MPI_Group_incl(group, static_cast<int>(groupRanks.size()), groupRanks.data(), &_subGroups[counter]);
597 : int err = MPI_Comm_create_group(MPI_COMM_WORLD, _subGroups[counter], static_cast<int>(buffer.first), &_subComms[counter]);
598 : if (err != MPI_SUCCESS) {
599 : char* str = new char[MPI_MAX_ERROR_STRING];
600 : int len = -1;
601 : MPI_Error_string(err, str, &len);
602 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::allocateBcastRequests(): Unable to create communicator "
603 : << "on rank " << thisRank << " together with ranks [ ";
604 : for (int& item : groupRanks) {
605 : std::cout << item << " ";
606 : }
607 : std::cout << " ]" << std::endl;
608 : delete[] str;
609 : exit(EXIT_FAILURE);
610 : }
611 :
612 : std::cout << thisRank << " : "
613 : << " allocated request for group " << groupRanks << " with tag " << buffer.first << std::endl;
614 :
615 : counter += 1;
616 : }
617 :
618 : _requestsAllocated = true;
619 : #endif
620 : }
621 :
622 : #pragma endregion // collective operations
623 :
624 : #pragma region sequential operations
625 :
626 : template <class Cell_T, unsigned int dim>
627 : template <class Container_T>
628 0 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::writeToSendBuffer(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange,
629 : const Container_T& cells) {
630 0 : I01 idx;
631 : Cell_T* couplingCell;
632 0 : for (auto pair : cells) {
633 0 : std::tie(couplingCell, idx) = pair;
634 0 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
635 0 : const unsigned int globalIndex = I00{idx}.get();
636 :
637 : // determine the ranks which need to receive information of this coupling
638 : // cell
639 0 : const std::vector<unsigned int> ranks = dataExchange.getTargetRanks(idx);
640 0 : const unsigned int ranksSize = (unsigned int)ranks.size();
641 : #if (COUPLING_MD_DEBUG == COUPLING_MD_YES)
642 : std::cout << "Rank " << IDXS.getRank() << ": write cell " << idx << " to send buffer;";
643 : std::cout << " associated target ranks: ";
644 : for (unsigned int i = 0; i < ranksSize; i++) {
645 : std::cout << ranks[i] << " ";
646 : }
647 : std::cout << std::endl;
648 : #endif
649 :
650 : // loop over ranks
651 0 : for (unsigned int i = 0; i < ranksSize; i++) {
652 :
653 : // get iterator to map
654 0 : typename std::map<unsigned int, BufferWithID>::iterator thisBuffer = _sendBuffer.find(ranks[i]);
655 0 : if (thisBuffer == _sendBuffer.end()) {
656 0 : _sendBuffer[ranks[i]] = BufferWithID();
657 0 : thisBuffer = _sendBuffer.find(ranks[i]);
658 : }
659 :
660 : // increment buffer size and realloc buffer
661 0 : const unsigned int firstPos = (thisBuffer->second).bufferSize * doublesPerCell;
662 0 : (thisBuffer->second).bufferSize++;
663 0 : (thisBuffer->second).buffer = (double*)realloc((thisBuffer->second).buffer, sizeof(double) * (thisBuffer->second).bufferSize * doublesPerCell);
664 0 : if ((thisBuffer->second).buffer == NULL) {
665 0 : std::cout << "ERROR coupling::SendReceiveBuffer::writeToSendBuffer(): realloc yields NULL ptr!" << std::endl;
666 0 : exit(EXIT_FAILURE);
667 : }
668 :
669 : // write global cell index and call to specialised function to read from coupling cell and write to send buffer
670 0 : (thisBuffer->second).buffer[firstPos] = (double)globalIndex;
671 :
672 0 : dataExchange.readFromCell(&((thisBuffer->second).buffer[firstPos + 1]), *couplingCell);
673 : } // rank
674 : }
675 0 : }
676 :
677 : template <class Cell_T, unsigned int dim>
678 : template <class Container_T>
679 0 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::readFromReceiveBuffer(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange,
680 : const Container_T& cells) {
681 0 : I01 idx;
682 : Cell_T* couplingCell;
683 0 : for (auto pair : cells) {
684 0 : std::tie(couplingCell, idx) = pair;
685 :
686 0 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
687 : using namespace coupling::indexing;
688 :
689 0 : const unsigned int globalIndex = I00{idx}.get();
690 :
691 : // loop over all received buffers
692 0 : bool breakCellLoop = false;
693 0 : for (typename std::map<unsigned int, BufferWithID>::const_iterator it = _receiveBuffer.begin(); !breakCellLoop && it != _receiveBuffer.end(); it++) {
694 :
695 : // loop over all received cells of this buffer
696 : // TODO use std::tie instead of it->first/second
697 0 : const unsigned int bufferSize = (it->second).bufferSize;
698 0 : for (unsigned int i = 0; !breakCellLoop && i < bufferSize; i++) {
699 :
700 : // determine global cell index for this coupling cell
701 0 : const unsigned int thisIndex = (unsigned int)(it->second).buffer[i * doublesPerCell];
702 :
703 : // if this is the correct cell, write data into coupling cell and return
704 0 : if (thisIndex == globalIndex) {
705 : // read receive buffer and write to coupling cell (use specialised function)
706 0 : dataExchange.writeToCell(&((it->second).buffer[i * doublesPerCell + 1]), *couplingCell);
707 : breakCellLoop = true;
708 : }
709 : } // loop over buffer
710 : } // loop over all buffers
711 : }
712 0 : }
713 :
714 : template <class Cell_T, unsigned int dim>
715 0 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::allocateReceiveBuffers(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange, I01 idx) {
716 :
717 0 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
718 :
719 : // determine the ranks from which we need to receive information of this coupling cell
720 0 : const std::vector<unsigned int> ranks = dataExchange.getSourceRanks(idx);
721 0 : const unsigned int ranksSize = (unsigned int)ranks.size();
722 : #if (COUPLING_MD_DEBUG == COUPLING_MD_YES)
723 : std::cout << "Rank " << IDXS.getRank() << ": allocate receive buffers for cell " << idx << ";";
724 : std::cout << " associated source ranks: ";
725 : for (unsigned int i = 0; i < ranksSize; i++) {
726 : std::cout << ranks[i] << " ";
727 : }
728 : std::cout << std::endl;
729 : #endif
730 :
731 : // loop over ranks
732 0 : for (unsigned int i = 0; i < ranksSize; i++) {
733 :
734 : // get iterator to map
735 0 : typename std::map<unsigned int, BufferWithID>::iterator thisBuffer = _receiveBuffer.find(ranks[i]);
736 0 : if (thisBuffer == _receiveBuffer.end()) {
737 0 : _receiveBuffer[ranks[i]] = BufferWithID();
738 0 : thisBuffer = _receiveBuffer.find(ranks[i]);
739 : }
740 :
741 : // increment buffer size and realloc buffer
742 0 : const unsigned int firstPos = (thisBuffer->second).bufferSize * doublesPerCell;
743 0 : (thisBuffer->second).bufferSize++;
744 0 : (thisBuffer->second).buffer = (double*)realloc((thisBuffer->second).buffer, sizeof(double) * (thisBuffer->second).bufferSize * doublesPerCell);
745 0 : if ((thisBuffer->second).buffer == NULL) {
746 0 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateReceiveBuffers(): realloc yields NULL ptr!" << std::endl;
747 0 : exit(EXIT_FAILURE);
748 : }
749 : // set all values to -1.0
750 0 : for (unsigned int j = firstPos; j < firstPos + doublesPerCell; j++) {
751 0 : (thisBuffer->second).buffer[j] = -1.0;
752 : }
753 : } // rank
754 0 : }
755 :
756 : template <class Cell_T, unsigned int dim>
757 0 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::triggerSending(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange) {
758 0 : const unsigned int thisRank = IDXS.getRank();
759 0 : const unsigned int doublesPerCell = dataExchange.getDoublesPerCell() + 1;
760 :
761 : // loop over all ranks
762 : // counter loops over [0; _sendSize-1]
763 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
764 0 : int counter = 0;
765 : #endif
766 :
767 0 : for (typename std::map<unsigned int, BufferWithID>::iterator it = _sendBuffer.begin(); it != _sendBuffer.end(); it++) {
768 : // if this is not the same rank, do sending
769 0 : if ((it->first) != thisRank) {
770 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
771 0 : int err = MPI_Isend((it->second).buffer, doublesPerCell * (it->second).bufferSize, MPI_DOUBLE, (it->first), dataExchange.getTag(), IDXS.getComm(),
772 0 : &_requests[counter]);
773 0 : if (err != MPI_SUCCESS) {
774 0 : char* str = NULL;
775 0 : int len = -1;
776 0 : MPI_Error_string(err, str, &len);
777 0 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::triggerSending(): Sending from rank " << thisRank << " to " << it->first
778 0 : << " failed with error: " << str << std::endl;
779 : delete str;
780 0 : exit(EXIT_FAILURE);
781 : }
782 0 : counter++;
783 : #endif
784 : // otherwise: copy information to _receiveBuffer
785 : } else {
786 :
787 : // determine receive-buffer for this rank and check that the buffer is available and of correct size
788 0 : typename std::map<unsigned int, BufferWithID>::iterator itRecv = _receiveBuffer.find(thisRank);
789 0 : if ((itRecv == _receiveBuffer.end())) {
790 0 : std::cout << "ERROR coupling::SendReceiveBuffer::triggerSending(): Could not send from rank " << thisRank
791 0 : << " to the same rank! No receive buffer available!";
792 0 : std::cout << " Tag (in case of MPI; not used here): " << dataExchange.getTag() << std::endl;
793 0 : exit(EXIT_FAILURE);
794 : }
795 0 : if ((itRecv->second).bufferSize != (it->second).bufferSize) {
796 0 : std::cout << "ERROR coupling::SendReceiveBuffer:triggerSending(): Send- and receive-buffer sizes do not match for information processed on same rank!"
797 0 : << std::endl;
798 0 : std::cout << "Recv-buffer size=" << (itRecv->second).bufferSize << ", send-buffer size=" << (it->second).bufferSize << std::endl;
799 0 : exit(EXIT_FAILURE);
800 : }
801 :
802 : // copy information from send- to receive buffer
803 0 : memcpy((itRecv->second).buffer, (it->second).buffer, sizeof(double) * doublesPerCell * (it->second).bufferSize);
804 : }
805 : }
806 0 : }
807 :
808 : template <class Cell_T, unsigned int dim>
809 0 : void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::triggerReceiving(coupling::sendrecv::DataExchange<Cell_T, dim>& dataExchange) {
810 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
811 0 : const unsigned int thisRank = IDXS.getRank();
812 :
813 : // loop over all ranks (which are not this rank) and trigger MPI_Irecv
814 : // counter starts at _sendSize; recv-requests are in
815 : // [_sendSize;_sendSize+_recvSize-1]
816 0 : int counter = _sendSize;
817 0 : for (typename std::map<unsigned int, BufferWithID>::iterator it = _receiveBuffer.begin(); it != _receiveBuffer.end(); it++) {
818 0 : if ((it->first) != thisRank) {
819 0 : int err = MPI_Irecv((it->second).buffer, (1 + dataExchange.getDoublesPerCell()) * (it->second).bufferSize, MPI_DOUBLE, (it->first), dataExchange.getTag(),
820 0 : IDXS.getComm(), &_requests[counter]);
821 0 : if (err != MPI_SUCCESS) {
822 0 : char* str = NULL;
823 0 : int len = -1;
824 0 : MPI_Error_string(err, str, &len);
825 0 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::triggerReceiving(): Receiving on rank " << thisRank << " from " << it->first
826 0 : << " failed with error: " << str << std::endl;
827 : delete str;
828 0 : exit(EXIT_FAILURE);
829 : }
830 0 : counter++;
831 : }
832 : }
833 : #endif
834 0 : }
835 :
836 0 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::allocateRequests() {
837 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
838 0 : if (_requestsAllocated) {
839 0 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateRequests(): allocateRequests() called although allocation already took place!" << std::endl;
840 0 : exit(EXIT_FAILURE);
841 : }
842 0 : if (_requests != NULL) {
843 0 : delete[] _requests;
844 0 : _requests = NULL;
845 : }
846 0 : const unsigned int thisRank = IDXS.getRank();
847 :
848 : // determine number of MPI requests; no requests for on-rank operations
849 0 : _receiveSize = _receiveBuffer.size();
850 0 : _sendSize = _sendBuffer.size();
851 0 : if (_sendBuffer.find(thisRank) != _sendBuffer.end()) {
852 0 : _sendSize--;
853 : }
854 0 : if (_receiveBuffer.find(thisRank) != _receiveBuffer.end()) {
855 0 : _receiveSize--;
856 : }
857 : // nop, that is no request allocations, if there are no MPI transfers
858 0 : if (_receiveSize + _sendSize == 0) {
859 0 : return;
860 : }
861 :
862 0 : _requests = new MPI_Request[_receiveSize + _sendSize];
863 0 : if (_requests == NULL) {
864 0 : std::cout << "ERROR coupling::SendReceiveBuffer::allocateRequests(): _requests==NULL!" << std::endl;
865 0 : exit(EXIT_FAILURE);
866 : }
867 :
868 0 : _requestsAllocated = true;
869 : #endif
870 : }
871 :
872 0 : template <class Cell_T, unsigned int dim> void coupling::sendrecv::SendReceiveBuffer<Cell_T, dim>::waitAllOperations() {
873 : #if (COUPLING_MD_PARALLEL == COUPLING_MD_YES)
874 : // if no requests have been allocated, return immediately
875 0 : if (!_requestsAllocated && (_requests == NULL)) {
876 : return;
877 : }
878 0 : if (_requests == NULL) {
879 0 : std::cout << "ERROR coupling::SendReceiveBuffer::waitAllOperations(): _requests==NULL!" << std::endl;
880 0 : exit(EXIT_FAILURE);
881 : }
882 0 : int err = MPI_Waitall(_sendSize + _receiveSize, _requests, MPI_STATUSES_IGNORE);
883 0 : if (err != MPI_SUCCESS) {
884 0 : char* str = NULL;
885 0 : int len = -1;
886 0 : MPI_Error_string(err, str, &len);
887 0 : std::cout << "ERROR coupling::sendrecv::SendRecvBuffer::waitAllOperations(): Waiting failed with error: " << str << std::endl;
888 : delete str;
889 0 : exit(EXIT_FAILURE);
890 : }
891 0 : delete[] _requests;
892 0 : for (MPI_Comm& subcomm : _subComms) {
893 0 : if (subcomm != MPI_COMM_NULL) {
894 0 : MPI_Comm_free(&subcomm);
895 0 : subcomm = MPI_COMM_NULL;
896 : }
897 : }
898 0 : for (MPI_Group& subgroup : _subGroups) {
899 0 : if (subgroup != MPI_GROUP_NULL) {
900 0 : MPI_Group_free(&subgroup);
901 0 : subgroup = MPI_GROUP_NULL;
902 : }
903 : }
904 0 : _requests = NULL;
905 0 : _requestsAllocated = false;
906 : #endif
907 : }
908 : #pragma endregion // sequential operations
|