Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GPU Accelerated Tree Search Chapel
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
UltraBO
GPU Accelerated Tree Search Chapel
Commits
1fe780df
Commit
1fe780df
authored
2 months ago
by
Guillaume-Helbecque
Browse files
Options
Downloads
Patches
Plain Diff
optimize few things
parent
9e7f956d
Branches
refactor-intra-ws-pfsp
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
pfsp_chpl.chpl
+0
-1
0 additions, 1 deletion
pfsp_chpl.chpl
pfsp_gpu_chpl.chpl
+3
-3
3 additions, 3 deletions
pfsp_gpu_chpl.chpl
pfsp_multigpu_chpl.chpl
+17
-18
17 additions, 18 deletions
pfsp_multigpu_chpl.chpl
with
20 additions
and
22 deletions
pfsp_chpl.chpl
+
0
−
1
View file @
1fe780df
...
@@ -6,7 +6,6 @@ use Time;
...
@@ -6,7 +6,6 @@ use Time;
use util;
use util;
use Pool;
use Pool;
use PFSP_node;
use PFSP_node;
use Bound_johnson;
use Bound_johnson;
use Bound_simple;
use Bound_simple;
...
...
This diff is collapsed.
Click to expand it.
pfsp_gpu_chpl.chpl
+
3
−
3
View file @
1fe780df
...
@@ -218,7 +218,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
...
@@ -218,7 +218,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
@assertOnGpu
@assertOnGpu
foreach parentId in 0..#(size/jobs) {
foreach parentId in 0..#(size/jobs) {
var parent = parents_d[parentId];
var parent = parents_d[parentId];
const depth = parent.depth;
/*
const depth = parent.depth;
*/
var prmu = parent.prmu;
var prmu = parent.prmu;
var lb_begin: MAX_JOBS*int(32);
var lb_begin: MAX_JOBS*int(32);
...
@@ -289,10 +289,10 @@ proc generate_children(const ref parents: [] Node, const size: int, const ref bo
...
@@ -289,10 +289,10 @@ proc generate_children(const ref parents: [] Node, const size: int, const ref bo
} else { // if not leaf
} else { // if not leaf
if (lowerbound < best) { // if child feasible
if (lowerbound < best) { // if child feasible
var child = new Node();
var child = new Node();
child.depth =
parent.
depth + 1;
child.depth = depth + 1;
child.limit1 = parent.limit1 + 1;
child.limit1 = parent.limit1 + 1;
child.prmu = parent.prmu;
child.prmu = parent.prmu;
child.prmu[
parent.
depth] <=> child.prmu[j];
child.prmu[depth] <=> child.prmu[j];
pool.pushBack(child);
pool.pushBack(child);
exploredTree += 1;
exploredTree += 1;
...
...
This diff is collapsed.
Click to expand it.
pfsp_multigpu_chpl.chpl
+
17
−
18
View file @
1fe780df
...
@@ -107,7 +107,7 @@ proc decompose_lb1(const lb1_data, const parent: Node, ref tree_loc: uint, ref n
...
@@ -107,7 +107,7 @@ proc decompose_lb1(const lb1_data, const parent: Node, ref tree_loc: uint, ref n
}
}
} else { // if not leaf
} else { // if not leaf
if (lowerbound < best) { // if child feasible
if (lowerbound < best) { // if child feasible
pool.pushBack(child);
pool.pushBack
Free
(child);
tree_loc += 1;
tree_loc += 1;
}
}
}
}
...
@@ -115,7 +115,7 @@ proc decompose_lb1(const lb1_data, const parent: Node, ref tree_loc: uint, ref n
...
@@ -115,7 +115,7 @@ proc decompose_lb1(const lb1_data, const parent: Node, ref tree_loc: uint, ref n
}
}
proc decompose_lb1_d(const lb1_data, const parent: Node, ref tree_loc: uint, ref num_sol: uint,
proc decompose_lb1_d(const lb1_data, const parent: Node, ref tree_loc: uint, ref num_sol: uint,
ref best: int, ref pool
: SinglePool_par(Node)
)
ref best: int, ref pool)
{
{
var lb_begin: MAX_JOBS*int(32);
var lb_begin: MAX_JOBS*int(32);
...
@@ -139,7 +139,7 @@ proc decompose_lb1_d(const lb1_data, const parent: Node, ref tree_loc: uint, ref
...
@@ -139,7 +139,7 @@ proc decompose_lb1_d(const lb1_data, const parent: Node, ref tree_loc: uint, ref
child.prmu = parent.prmu;
child.prmu = parent.prmu;
child.prmu[parent.depth] <=> child.prmu[i];
child.prmu[parent.depth] <=> child.prmu[i];
pool.pushBack(child);
pool.pushBack
Free
(child);
tree_loc += 1;
tree_loc += 1;
}
}
}
}
...
@@ -147,7 +147,7 @@ proc decompose_lb1_d(const lb1_data, const parent: Node, ref tree_loc: uint, ref
...
@@ -147,7 +147,7 @@ proc decompose_lb1_d(const lb1_data, const parent: Node, ref tree_loc: uint, ref
}
}
proc decompose_lb2(const lb1_data, const lb2_data, const parent: Node, ref tree_loc: uint,
proc decompose_lb2(const lb1_data, const lb2_data, const parent: Node, ref tree_loc: uint,
ref num_sol: uint, ref best: int, ref pool
: SinglePool_par(Node)
)
ref num_sol: uint, ref best: int, ref pool)
{
{
for i in parent.limit1+1..(jobs-1) {
for i in parent.limit1+1..(jobs-1) {
var child = new Node();
var child = new Node();
...
@@ -166,7 +166,7 @@ proc decompose_lb2(const lb1_data, const lb2_data, const parent: Node, ref tree_
...
@@ -166,7 +166,7 @@ proc decompose_lb2(const lb1_data, const lb2_data, const parent: Node, ref tree_
}
}
} else { // if not leaf
} else { // if not leaf
if (lowerbound < best) { // if child feasible
if (lowerbound < best) { // if child feasible
pool.pushBack(child);
pool.pushBack
Free
(child);
tree_loc += 1;
tree_loc += 1;
}
}
}
}
...
@@ -220,7 +220,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
...
@@ -220,7 +220,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
@assertOnGpu
@assertOnGpu
foreach parentId in 0..#(size/jobs) {
foreach parentId in 0..#(size/jobs) {
var parent = parents_d[parentId];
var parent = parents_d[parentId];
const depth = parent.depth;
/*
const depth = parent.depth;
*/
var prmu = parent.prmu;
var prmu = parent.prmu;
var lb_begin: MAX_JOBS*int(32);
var lb_begin: MAX_JOBS*int(32);
...
@@ -273,8 +273,10 @@ proc evaluate_gpu(const parents_d: [] Node, const size, const best, const lbound
...
@@ -273,8 +273,10 @@ proc evaluate_gpu(const parents_d: [] Node, const size, const best, const lbound
// Generate children nodes (evaluated by GPU) on CPU.
// Generate children nodes (evaluated by GPU) on CPU.
proc generate_children(const ref parents: [] Node, const size: int, const ref bounds: [] int(32),
proc generate_children(const ref parents: [] Node, const size: int, const ref bounds: [] int(32),
ref exploredTree: uint, ref exploredSol: uint, ref best: int, ref pool
: SinglePool_par(Node)
)
ref exploredTree: uint, ref exploredSol: uint, ref best: int, ref pool)
{
{
pool.acquireLock();
for i in 0..#size {
for i in 0..#size {
const parent = parents[i];
const parent = parents[i];
const depth = parent.depth;
const depth = parent.depth;
...
@@ -291,17 +293,19 @@ proc generate_children(const ref parents: [] Node, const size: int, const ref bo
...
@@ -291,17 +293,19 @@ proc generate_children(const ref parents: [] Node, const size: int, const ref bo
} else { // if not leaf
} else { // if not leaf
if (lowerbound < best) { // if child feasible
if (lowerbound < best) { // if child feasible
var child = new Node();
var child = new Node();
child.depth =
parent.
depth + 1;
child.depth = depth + 1;
child.limit1 = parent.limit1 + 1;
child.limit1 = parent.limit1 + 1;
child.prmu = parent.prmu;
child.prmu = parent.prmu;
child.prmu[
parent.
depth] <=> child.prmu[j];
child.prmu[depth] <=> child.prmu[j];
pool.pushBack(child);
pool.pushBack
Free
(child);
exploredTree += 1;
exploredTree += 1;
}
}
}
}
}
}
}
}
pool.releaseLock();
}
}
// Multi-GPU PFSP search.
// Multi-GPU PFSP search.
...
@@ -314,9 +318,6 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -314,9 +318,6 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
var pool = new SinglePool_par(Node);
var pool = new SinglePool_par(Node);
pool.pushBack(root);
pool.pushBack(root);
var allTasksIdleFlag: atomic bool = false;
var eachTaskState: [0..#D] atomic bool = BUSY; // one task per GPU
var timer: stopwatch;
var timer: stopwatch;
/*
/*
...
@@ -358,12 +359,13 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -358,12 +359,13 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
var eachExploredTree, eachExploredSol: [0..#D] uint = noinit;
var eachExploredTree, eachExploredSol: [0..#D] uint = noinit;
var eachBest: [0..#D] int = noinit;
var eachBest: [0..#D] int = noinit;
var eachTaskState: [0..#D] atomic bool = BUSY; // one task per GPU
var allTasksIdleFlag: atomic bool = false;
const poolSize = pool.size;
const poolSize = pool.size;
const c = poolSize / D;
const c = poolSize / D;
const l = poolSize - (D-1)*c;
const l = poolSize - (D-1)*c;
const f = pool.front;
const f = pool.front;
var lock_p: atomic bool;
pool.front = 0;
pool.front = 0;
pool.size = 0;
pool.size = 0;
...
@@ -433,7 +435,7 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -433,7 +435,7 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
generate_children(parents, poolSize, bounds, tree, sol, best_l, pool_loc);
generate_children(parents, poolSize, bounds, tree, sol, best_l, pool_loc);
}
}
else {
else {
// work stealing
// work stealing
attempts
var tries = 0;
var tries = 0;
var steal = false;
var steal = false;
const victims = permute(0..#D);
const victims = permute(0..#D);
...
@@ -457,9 +459,6 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -457,9 +459,6 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
halt("DEADCODE in work stealing");
halt("DEADCODE in work stealing");
}
}
/* for i in 0..#(size/2) {
pool_loc.pushBack(p[i]);
} */
pool_loc.pushBackBulk(p);
pool_loc.pushBackBulk(p);
steal = true;
steal = true;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment