diff --git a/lib/common/Pool_par.chpl b/lib/common/Pool_par.chpl index aa1b7865cfd352963a6014a50036a8cc2415487d..22a920e938e5a08c19486230159c0b77b14c6ebc 100644 --- a/lib/common/Pool_par.chpl +++ b/lib/common/Pool_par.chpl @@ -25,6 +25,18 @@ module Pool_par this.lock = false; } + proc ref acquireLock() { + while true { + if this.lock.compareAndSwap(false, true) { + return; + } + } + } + + proc ref releaseLock() { + this.lock.write(false); + } + // Parallel-safe insertion to the end of the deque. proc ref pushBack(node: eltType) { while true { @@ -44,6 +56,16 @@ module Pool_par } } + proc ref pushBackFree(node: eltType) { + if (this.front + this.size >= this.capacity) { + this.capacity *= 2; + this.dom = {0..#this.capacity}; + } + + this.elements[this.front + this.size] = node; + this.size += 1; + } + // Insertion to the end of the deque. Parallel-safety is not guaranteed. proc ref pushBackFree(node: eltType) { if (this.front + this.size >= this.capacity) { diff --git a/nqueens_multigpu_chpl.chpl b/nqueens_multigpu_chpl.chpl index 8c5f302ed75e4dfd4226b401b9f6c4be3854c595..d5268d017d699abe04de9aeaac4974ff602a98e4 100644 --- a/nqueens_multigpu_chpl.chpl +++ b/nqueens_multigpu_chpl.chpl @@ -126,6 +126,8 @@ proc evaluate_gpu(const parents_d: [] Node, const size, ref labels_d) proc generate_children(const ref parents: [] Node, const size: int, const ref labels: [] uint(8), ref exploredTree: uint, ref exploredSol: uint, ref pool) { + pool.acquireLock(); + for i in 0..#size { const parent = parents[i]; const depth = parent.depth; @@ -139,11 +141,13 @@ proc generate_children(const ref parents: [] Node, const size: int, const ref la child.depth = depth + 1; child.board = parent.board; child.board[depth] <=> child.board[j]; - pool.pushBack(child); + pool.pushBackFree(child); exploredTree += 1; } } } + + pool.releaseLock(); } // Multi-GPU N-Queens search. @@ -318,14 +322,11 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi } } - if lock.compareAndSwap(false, true) { - const poolLocSize = pool_loc.size; - for p in 0..#poolLocSize { - var hasWork = 0; - pool.pushBack(pool_loc.popBack(hasWork)); - if !hasWork then break; - } - lock.write(false); + const poolLocSize = pool_loc.size; + for p in 0..#poolLocSize { + var hasWork = 0; + pool.pushBack(pool_loc.popBack(hasWork)); + if !hasWork then break; } eachExploredTree[gpuID] = tree;