Skip to content

Commit f74a330

Browse files
authored
Link id update (#1569)
* Modernize sst_types.h and add bit_util namespace for functions and variables used to manipulate bits in the various IDs used in the core. * Remove order field from ConfigLink and change remaining variable names to add a trailing underscore per the coding standards. * Change LinkId_t to be 64-bits, make it globally unique, and store the ID in the Link object. This added an extra exchange between ConfigGraphs created on different ranks during a parallel load. * Changed the exchange of link pointers from using names to using link IDs. Also removed the std::map currently used for the exchange and replaced it with std::vector. The vector is sorted and compared item by item to the data sent from the remote rank removing the need to do a bunch of lookups in the map. This saves both memory and compute time. * Change Link ID exchange to directly serialized data into send buffer and deserialize a link at a time to save memory. Also switch to non-blocking send/recv to avoid deadlocks.
1 parent 0cc2be4 commit f74a330

31 files changed

+942
-497
lines changed

src/sst/core/impl/partitioners/simplepart.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ SimplePartitioner::performPartition(PartitionGraph* graph)
235235
// ConfigLink* theLink = (*linkItr);
236236
PartitionLink& theLink = linkMap[*linkItr];
237237
compConnectMap->insert(
238-
std::pair<ComponentId_t, SimTime_t>(theLink.component[1], theLink.getMinLatency()));
238+
std::pair<ComponentId_t, SimTime_t>(theLink.component_[1], theLink.getMinLatency()));
239239
}
240240
}
241241

src/sst/core/link.cc

Lines changed: 81 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
8080
// Need a pointer to my simulation object
8181
Simulation_impl* sim = Simulation_impl::getSimulation();
8282

83-
// In order to uniquely identify links on restart, we need to
83+
// For restarts that use the same parallelism, we need to
8484
// track the rank of the link and its pair link. For regular
8585
// links, they are the same, but for sync link pairs, the pair
8686
// link will be on a different rank. For self links, this
@@ -109,55 +109,24 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
109109

110110
SST_SER(type);
111111

112-
/*
113-
Unique Identifiers
114-
115-
For non-selflinks, we need to be able to create a unique
116-
identifier so we can connect the pairs on restart. The
117-
unique identifiers are created using the MPI rank and point
118-
of the link cast as a uintptr_t.
119-
120-
For regular links, we only store the rank once since both
121-
links in the pair are on the same rank.
122-
123-
For SYNC links, the local link only knows the remote link by
124-
it's pair link, so we will use that pointer for the unique
125-
ID.
126-
127-
For self links, no rank info is stored since we don't need
128-
to create a unique ID
129-
*/
130112
if ( type == SYNC || type == REG ) {
131113
SST_SER(my_rank);
132114

133-
uintptr_t ptr;
134-
if ( type == SYNC )
135-
ptr = reinterpret_cast<uintptr_t>(s->pair_link);
136-
else
137-
ptr = reinterpret_cast<uintptr_t>(s);
138-
139-
SST_SER(ptr);
140-
141115
if ( type == SYNC ) {
142-
// The unique ID for the remote links is constructed from
143-
// the rank of the remote pair link and its pointer on
144-
// that rank. The remote pointer is stored in
145-
// delivery_info and we can get the remote rank from the
146-
// sync queue.
116+
// Get rank for pair
147117
SyncQueue* q = dynamic_cast<SyncQueue*>(s->send_queue);
148118
pair_rank = q->getToRank();
149119
SST_SER(pair_rank);
150-
SST_SER(s->delivery_info);
151-
}
152-
else {
153-
// Unique ID for my pair link is my rank and pair_link
154-
// pointer. Rank is already stored, just store pair
155-
// pointer
156-
uintptr_t pair_ptr = reinterpret_cast<uintptr_t>(s->pair_link);
157-
SST_SER(pair_ptr);
158120
}
159121
} // if ( type == SYNC || type == REG )
160122

123+
124+
// Serialize the ID for the Link
125+
if ( !s->has_tool_list )
126+
SST_SER(s->id);
127+
else
128+
SST_SER((*s->attached_tools)[0].second);
129+
161130
/*
162131
Store the metadata for this link
163132
*/
@@ -217,24 +186,31 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
217186

218187
// Determine how many serializable tools there are
219188
Link::ToolList tools;
220-
if ( s->attached_tools ) {
221-
for ( auto x : *s->attached_tools ) {
222-
if ( dynamic_cast<SST::Core::Serialization::serializable*>(x.first) ) {
223-
tools.push_back(x);
189+
190+
if ( s->has_tool_list ) {
191+
for ( auto x = ++s->attached_tools->begin(); x != s->attached_tools->end(); ++x ) {
192+
if ( dynamic_cast<SST::Core::Serialization::serializable*>(x->first) ) {
193+
tools.push_back(*x);
224194
}
225195
}
226196
}
227197
size_t tool_count = tools.size();
228-
SST_SER(tool_count);
229-
if ( tool_count > 0 ) {
230-
// Serialize each tool, then call
231-
// serializeEventAttachPointKey() to serialize any data
232-
// associated with the key
233-
for ( auto x : tools ) {
234-
SST::Core::Serialization::serializable* obj =
235-
dynamic_cast<SST::Core::Serialization::serializable*>(x.first);
236-
SST_SER(obj);
237-
x.first->serializeEventAttachPointKey(ser, x.second);
198+
199+
// Need to determine if we'll have any tools attached on restart. We only have tools when tool_count > 0
200+
bool restart_tools = (tool_count > 0);
201+
SST_SER(restart_tools);
202+
203+
if ( restart_tools ) {
204+
SST_SER(tool_count);
205+
if ( tool_count > 0 ) {
206+
// Serialize each tool, then call serializeEventAttachPointKey() to serialize any data associated with
207+
// the key
208+
for ( auto x : tools ) {
209+
SST::Core::Serialization::serializable* obj =
210+
dynamic_cast<SST::Core::Serialization::serializable*>(x.first);
211+
SST_SER(obj);
212+
x.first->serializeEventAttachPointKey(ser, x.second);
213+
}
238214
}
239215
}
240216

@@ -265,35 +241,28 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
265241
*/
266242
bool is_orig_sync = (type == 3);
267243

268-
/*
269-
Unique identifiers
270-
271-
Get the ranks and tags for this link and its pair link
272-
*/
273244
RankInfo my_restart_rank = sim->getRank();
274245
RankInfo pair_restart_rank = my_restart_rank;
275246

276-
uintptr_t my_tag;
277-
uintptr_t pair_tag;
278247

279248
if ( type == SYNC || type == REG ) {
280249
SST_SER(my_rank);
281-
SST_SER(my_tag);
282250

283251
if ( type == SYNC )
284252
SST_SER(pair_rank);
285253
else
286254
pair_rank = my_rank;
287-
288-
SST_SER(pair_tag);
289255
}
290256

291257

258+
LinkId_t link_id;
259+
SST_SER(link_id);
260+
292261
/*
293262
Determine current sync state
294263
*/
295264
if ( type != SELF ) {
296-
pair_restart_rank = sim->getRankForLinkOnRestart(pair_rank, pair_tag);
265+
pair_restart_rank = sim->getRankForLinkOnRestart(pair_rank, link_id);
297266

298267
// If pair_restart_rank.rank == UNASSIGNED, then we have
299268
// the same paritioning as the checkpoint and the ranks
@@ -303,6 +272,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
303272

304273
bool is_restart_sync = (my_restart_rank != pair_restart_rank);
305274

275+
306276
/*
307277
Create or get link from tracker
308278
@@ -315,14 +285,12 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
315285
ser.unpacker().report_new_pointer(reinterpret_cast<uintptr_t>(s));
316286
}
317287
else {
318-
auto& link_tracker = sim->link_restart_tracking;
319-
std::pair<int, uintptr_t> my_unique_id = std::make_pair(my_rank.rank, my_tag);
320-
std::pair<int, uintptr_t> pair_unique_id = std::make_pair(pair_rank.rank, pair_tag);
288+
auto& link_tracker = sim->link_restart_tracking;
321289

322-
if ( !is_restart_sync && link_tracker.count(my_unique_id) ) {
290+
if ( !is_restart_sync && link_tracker.count(link_id) ) {
323291
// Get my link and erase it from the map
324-
s = link_tracker[my_unique_id];
325-
link_tracker.erase(my_unique_id);
292+
s = link_tracker[link_id];
293+
link_tracker.erase(link_id);
326294
}
327295
else {
328296
// Create a link pair and set s to the left link
@@ -335,10 +303,12 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
335303
s->pair_link->setLatency(0);
336304

337305
// Put my pair link in the tracking map
338-
link_tracker[pair_unique_id] = s->pair_link;
306+
link_tracker[link_id] = s->pair_link;
339307
}
340308
}
341309

310+
s->id = link_id;
311+
342312
/*
343313
Get the metadata for the link
344314
*/
@@ -401,13 +371,20 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
401371
s->pair_link->latency += latency;
402372
}
403373

404-
/*
405-
Restore attached tools
406-
*/
407-
size_t tool_count;
408-
SST_SER(tool_count);
409-
if ( tool_count > 0 ) {
410-
s->attached_tools = new Link::ToolList();
374+
SST_SER(s->has_tool_list);
375+
376+
if ( s->has_tool_list ) {
377+
/*
378+
Restore attached tools
379+
*/
380+
size_t tool_count;
381+
SST_SER(tool_count);
382+
383+
// If has_tool_list is true, then tool_count is greater than 0
384+
Link::ToolList* tools = new Link::ToolList();
385+
tools->emplace_back(nullptr, s->id);
386+
s->attached_tools = tools;
387+
s->has_tool_list = true;
411388
for ( size_t i = 0; i < tool_count; ++i ) {
412389
SST::Core::Serialization::serializable* tool;
413390
uintptr_t key;
@@ -417,9 +394,7 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
417394
s->attached_tools->emplace_back(ap, key);
418395
}
419396
}
420-
else {
421-
s->attached_tools = nullptr;
422-
}
397+
423398

424399
/*
425400
Deserialize the events targetting this link
@@ -440,17 +415,15 @@ SST::Core::Serialization::serialize_impl<Link*>::operator()(Link*& s, serializer
440415
s->pair_link->tag = s->tag;
441416

442417
s->pair_link->defaultTimeBase = 1;
418+
s->pair_link->id = s->getId();
443419

444-
// Need to register with the SyncManager, but first
445-
// need to create a unique name
446-
std::string uname = s->createUniqueGlobalLinkName(my_rank, my_tag, pair_rank, pair_tag);
447-
ActivityQueue* sync_q =
448-
sim->syncManager->registerLink(pair_restart_rank, my_restart_rank, uname, s->pair_link);
449-
s->send_queue = sync_q;
420+
// Need to register with the SyncManager
421+
ActivityQueue* sync_q = sim->syncManager->registerLink(pair_restart_rank, my_restart_rank, s->pair_link);
422+
s->send_queue = sync_q;
450423
}
451424
} break;
452425
case serializer::MAP:
453-
// TODO: Implement Link mapping mode
426+
// No current plans to make Links mappable
454427
break;
455428
}
456429
}
@@ -477,7 +450,7 @@ class NullEvent : public Event
477450
};
478451

479452

480-
Link::Link(LinkId_t tag) :
453+
Link::Link(LinkId_t id) :
481454
send_queue(nullptr),
482455
delivery_info(0),
483456
defaultTimeBase(0),
@@ -486,8 +459,8 @@ Link::Link(LinkId_t tag) :
486459
current_time(Simulation_impl::getSimulation()->currentSimCycle),
487460
type(UNINITIALIZED),
488461
mode(INIT),
489-
tag(tag),
490-
attached_tools(nullptr)
462+
tag(0),
463+
id(id)
491464
{}
492465

493466
Link::Link() :
@@ -499,8 +472,7 @@ Link::Link() :
499472
current_time(Simulation_impl::getSimulation()->currentSimCycle),
500473
type(UNINITIALIZED),
501474
mode(INIT),
502-
tag(type_max<uint32_t>),
503-
attached_tools(nullptr)
475+
tag(bit_util::type_max<uint32_t>)
504476
{}
505477

506478
Link::~Link()
@@ -514,7 +486,7 @@ Link::~Link()
514486
if ( SYNC == pair_link->type ) delete pair_link;
515487
}
516488

517-
if ( attached_tools ) delete attached_tools;
489+
if ( has_tool_list ) delete attached_tools;
518490
}
519491

520492
void
@@ -683,9 +655,10 @@ Link::send_impl(SimTime_t delay, Event* event)
683655
event->addRecvComponent(pair_link->comp, pair_link->ctype, pair_link->port);
684656
#endif
685657

686-
if ( attached_tools ) {
687-
for ( auto& x : *attached_tools ) {
688-
x.first->eventSent(x.second, event);
658+
if ( has_tool_list ) {
659+
// First entry just holds the Link id, so we can skip it
660+
for ( auto x = ++attached_tools->begin(); x != attached_tools->end(); ++x ) {
661+
x->first->eventSent(x->second, event);
689662
// Check to see if the event was deleted. If so, return.
690663
if ( nullptr == event ) return;
691664
}
@@ -840,17 +813,23 @@ Link::createUniqueGlobalLinkName(RankInfo local_rank, uintptr_t local_ptr, RankI
840813
void
841814
Link::attachTool(AttachPoint* tool, const AttachPointMetaData& mdata)
842815
{
843-
if ( !attached_tools ) attached_tools = new ToolList();
816+
if ( !has_tool_list ) {
817+
auto tools = new ToolList();
818+
tools->emplace_back(nullptr, id);
819+
attached_tools = tools;
820+
has_tool_list = true;
821+
}
844822
auto key = tool->registerLinkAttachTool(mdata);
845-
attached_tools->push_back(std::make_pair(tool, key));
823+
attached_tools->emplace_back(tool, key);
846824
}
847825

848826
void
849827
Link::detachTool(AttachPoint* tool)
850828
{
851-
if ( !attached_tools ) return;
829+
if ( !has_tool_list ) return;
852830

853-
for ( auto x = attached_tools->begin(); x != attached_tools->end(); ++x ) {
831+
// First entry just holds the Link id, so we can skip it
832+
for ( auto x = ++attached_tools->begin(); x != attached_tools->end(); ++x ) {
854833
if ( x->first == tool ) {
855834
attached_tools->erase(x);
856835
break;

0 commit comments

Comments
 (0)