diff --git a/lib/common/Pool_par.chpl b/lib/common/Pool_par.chpl
index aa1b7865cfd352963a6014a50036a8cc2415487d..22a920e938e5a08c19486230159c0b77b14c6ebc 100644
--- a/lib/common/Pool_par.chpl
+++ b/lib/common/Pool_par.chpl
@@ -25,6 +25,18 @@ module Pool_par
       this.lock = false;
     }
 
+    proc ref acquireLock() {
+      while true {
+        if this.lock.compareAndSwap(false, true) {
+          return;
+        }
+      }
+    }
+
+    proc ref releaseLock() {
+      this.lock.write(false);
+    }
+
     // Parallel-safe insertion to the end of the deque.
     proc ref pushBack(node: eltType) {
       while true {
@@ -44,6 +56,16 @@ module Pool_par
       }
     }
 
+    proc ref pushBackFree(node: eltType) {
+      if (this.front + this.size >= this.capacity) {
+        this.capacity *= 2;
+        this.dom = {0..#this.capacity};
+      }
+
+      this.elements[this.front + this.size] = node;
+      this.size += 1;
+    }
+
     // Insertion to the end of the deque. Parallel-safety is not guaranteed.
     proc ref pushBackFree(node: eltType) {
       if (this.front + this.size >= this.capacity) {
diff --git a/nqueens_multigpu_chpl.chpl b/nqueens_multigpu_chpl.chpl
index 8c5f302ed75e4dfd4226b401b9f6c4be3854c595..d5268d017d699abe04de9aeaac4974ff602a98e4 100644
--- a/nqueens_multigpu_chpl.chpl
+++ b/nqueens_multigpu_chpl.chpl
@@ -126,6 +126,8 @@ proc evaluate_gpu(const parents_d: [] Node, const size, ref labels_d)
 proc generate_children(const ref parents: [] Node, const size: int, const ref labels: [] uint(8),
   ref exploredTree: uint, ref exploredSol: uint, ref pool)
 {
+  pool.acquireLock();
+
   for i in 0..#size  {
     const parent = parents[i];
     const depth = parent.depth;
@@ -139,11 +141,13 @@ proc generate_children(const ref parents: [] Node, const size: int, const ref la
         child.depth = depth + 1;
         child.board = parent.board;
         child.board[depth] <=> child.board[j];
-        pool.pushBack(child);
+        pool.pushBackFree(child);
         exploredTree += 1;
       }
     }
   }
+
+  pool.releaseLock();
 }
 
 // Multi-GPU N-Queens search.
@@ -318,14 +322,11 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi
       }
     }
 
-    if lock.compareAndSwap(false, true) {
-      const poolLocSize = pool_loc.size;
-      for p in 0..#poolLocSize {
-        var hasWork = 0;
-        pool.pushBack(pool_loc.popBack(hasWork));
-        if !hasWork then break;
-      }
-      lock.write(false);
+    const poolLocSize = pool_loc.size;
+    for p in 0..#poolLocSize {
+      var hasWork = 0;
+      pool.pushBack(pool_loc.popBack(hasWork));
+      if !hasWork then break;
     }
 
     eachExploredTree[gpuID] = tree;