diff --git a/nqueens_multigpu_chpl.chpl b/nqueens_multigpu_chpl.chpl
index b2fc6d461d1c0971c449ebd0ca5a5b6be3a1a6c1..87fe687aad008d42473ab744fb312d764264f9fe 100644
--- a/nqueens_multigpu_chpl.chpl
+++ b/nqueens_multigpu_chpl.chpl
@@ -5,7 +5,7 @@
 use Time;
 
 use util;
-use Pool;
+use Pool_par;
 use GpuDiagnostics;
 
 use NQueens_node;
@@ -74,7 +74,7 @@ proc isSafe(const board, const queen_num, const row_pos): uint(8)
 }
 
 // Evaluate and generate children nodes on CPU.
-proc decompose(const parent: Node, ref tree_loc: uint, ref num_sol: uint, ref pool: SinglePool(Node))
+proc decompose(const parent: Node, ref tree_loc: uint, ref num_sol: uint, ref pool)
 {
   const depth = parent.depth;
 
@@ -124,7 +124,7 @@ proc evaluate_gpu(const parents_d: [] Node, const size, ref labels_d)
 
 // Generate children nodes (evaluated on GPU) on CPU.
 proc generate_children(const ref parents: [] Node, const size: int, const ref labels: [] uint(8),
-  ref exploredTree: uint, ref exploredSol: uint, ref pool: SinglePool(Node))
+  ref exploredTree: uint, ref exploredSol: uint, ref pool)
 {
   for i in 0..#size  {
     const parent = parents[i];
@@ -151,7 +151,7 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi
 {
   var root = new Node(N);
 
-  var pool = new SinglePool(Node);
+  var pool = new SinglePool_par(Node);
 
   pool.pushBack(root);
 
@@ -164,7 +164,7 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi
   timer.start();
   while (pool.size < D*m) {
     var hasWork = 0;
-    var parent = pool.popFront(hasWork);
+    var parent = pool.popFrontFree(hasWork);
     if !hasWork then break;
 
     decompose(parent, exploredTree, exploredSol, pool);
@@ -192,12 +192,15 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi
   pool.front = 0;
   pool.size = 0;
 
-  coforall gpuID in 0..#D with (ref pool, ref eachExploredTree, ref eachExploredSol) {
+  var multiPool: [0..#D] SinglePool_par(Node);
+
+  coforall gpuID in 0..#D with (ref pool, ref eachExploredTree, ref eachExploredSol,
+    ref multiPool) {
 
     const device = here.gpus[gpuID];
 
     var tree, sol: uint;
-    var pool_loc = new SinglePool(Node);
+    ref pool_loc = multiPool[gpuID];
 
     // each task gets its chunk
     pool_loc.elements[0..#c] = pool.elements[gpuID+f.. by D #c];
@@ -217,15 +220,16 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi
       /*
         Each task gets its parents nodes from the pool.
       */
-      var poolSize = pool_loc.size;
-      if (poolSize >= m) {
-        poolSize = min(poolSize, M);
+      var poolSize = pool_loc.popBackBulk(m, M, parents);
+
+      if (poolSize > 0) {
+        /* poolSize = min(poolSize, M);
 
         for i in 0..#poolSize {
           var hasWork = 0;
           parents[i] = pool_loc.popBack(hasWork);
           if !hasWork then break;
-        }
+        } */
 
         const numLabels = N * poolSize;
 
@@ -277,7 +281,7 @@ proc nqueens_search(ref exploredTree: uint, ref exploredSol: uint, ref elapsedTi
   timer.start();
   while true {
     var hasWork = 0;
-    var parent = pool.popBack(hasWork);
+    var parent = pool.popBackFree(hasWork);
     if !hasWork then break;
 
     decompose(parent, exploredTree, exploredSol, pool);