Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
GPU Accelerated Tree Search Chapel
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
UltraBO
GPU Accelerated Tree Search Chapel
Commits
b3fb0c76
Commit
b3fb0c76
authored
4 months ago
by
Guillaume-Helbecque
Browse files
Options
Downloads
Patches
Plain Diff
refactor array allocations
parent
2903c980
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
pfsp_gpu_chpl.chpl
+29
-38
29 additions, 38 deletions
pfsp_gpu_chpl.chpl
pfsp_multigpu_chpl.chpl
+19
-28
19 additions, 28 deletions
pfsp_multigpu_chpl.chpl
with
48 additions
and
66 deletions
pfsp_gpu_chpl.chpl
+
29
−
38
View file @
b3fb0c76
...
@@ -35,14 +35,14 @@ config const ub: int = 1; // initial upper bound
...
@@ -35,14 +35,14 @@ config const ub: int = 1; // initial upper bound
const jobs = taillard_get_nb_jobs(inst);
const jobs = taillard_get_nb_jobs(inst);
const machines = taillard_get_nb_machines(inst);
const machines = taillard_get_nb_machines(inst);
var lbound1 = new
WrapperLB1(jobs, machines); //
lb1_bound_data(jobs, machines);
var lbound1 = new lb1_bound_data(jobs, machines);
taillard_get_processing_times(lbound1
!.lb1_bound
.p_times, inst);
taillard_get_processing_times(lbound1.p_times, inst);
fill_min_heads_tails(lbound1
!.lb1_bound
);
fill_min_heads_tails(lbound1);
var lbound2 = new
WrapperLB2
(jobs, machines);
var lbound2 = new
lb2_bound_data
(jobs, machines);
fill_machine_pairs(lbound2
!.lb2_bound
/*, LB2_FULL*/);
fill_machine_pairs(lbound2/*, LB2_FULL*/);
fill_lags(lbound1
!.lb1_bound
.p_times, lbound2
!.lb2_bound
);
fill_lags(lbound1.p_times, lbound2);
fill_johnson_schedules(lbound1
!.lb1_bound
.p_times, lbound2
!.lb2_bound
);
fill_johnson_schedules(lbound1.p_times, lbound2);
const initUB = if (ub == 1) then taillard_get_best_ub(inst) else max(int);
const initUB = if (ub == 1) then taillard_get_best_ub(inst) else max(int);
...
@@ -105,7 +105,7 @@ proc decompose_lb1(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
...
@@ -105,7 +105,7 @@ proc decompose_lb1(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
child.prmu = parent.prmu;
child.prmu = parent.prmu;
child.prmu[parent.depth] <=> child.prmu[i];
child.prmu[parent.depth] <=> child.prmu[i];
var lowerbound = lb1_bound(lbound1
!.lb1_bound
, child.prmu, child.limit1, jobs);
var lowerbound = lb1_bound(lbound1, child.prmu, child.limit1, jobs);
if (child.depth == jobs) { // if child leaf
if (child.depth == jobs) { // if child leaf
num_sol += 1;
num_sol += 1;
...
@@ -127,7 +127,7 @@ proc decompose_lb1_d(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
...
@@ -127,7 +127,7 @@ proc decompose_lb1_d(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
{
{
var lb_begin: MAX_JOBS*int(32);
var lb_begin: MAX_JOBS*int(32);
lb1_children_bounds(lbound1
!.lb1_bound
, parent.prmu, parent.limit1, jobs, lb_begin);
lb1_children_bounds(lbound1, parent.prmu, parent.limit1, jobs, lb_begin);
for i in parent.limit1+1..(jobs-1) {
for i in parent.limit1+1..(jobs-1) {
const job = parent.prmu[i];
const job = parent.prmu[i];
...
@@ -164,7 +164,7 @@ proc decompose_lb2(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
...
@@ -164,7 +164,7 @@ proc decompose_lb2(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
child.prmu = parent.prmu;
child.prmu = parent.prmu;
child.prmu[parent.depth] <=> child.prmu[i];
child.prmu[parent.depth] <=> child.prmu[i];
var lowerbound = lb2_bound(lbound1
!.lb1_bound, lbound2!.lb2_bound
, child.prmu, child.limit1, jobs, best);
var lowerbound = lb2_bound(lbound1
, lbound2
, child.prmu, child.limit1, jobs, best);
if (child.depth == jobs) { // if child leaf
if (child.depth == jobs) { // if child leaf
num_sol += 1;
num_sol += 1;
...
@@ -211,7 +211,7 @@ proc evaluate_gpu_lb1(const parents_d: [] Node, const size, const lbound1_d, ref
...
@@ -211,7 +211,7 @@ proc evaluate_gpu_lb1(const parents_d: [] Node, const size, const lbound1_d, ref
if (k >= parent.limit1+1) {
if (k >= parent.limit1+1) {
prmu[depth] <=> prmu[k];
prmu[depth] <=> prmu[k];
bounds_d[threadId] = lb1_bound(lbound1_d
!.lb1_bound
, prmu, parent.limit1+1, jobs);
bounds_d[threadId] = lb1_bound(lbound1_d, prmu, parent.limit1+1, jobs);
prmu[depth] <=> prmu[k];
prmu[depth] <=> prmu[k];
}
}
}
}
...
@@ -233,7 +233,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
...
@@ -233,7 +233,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
var lb_begin: MAX_JOBS*int(32);
var lb_begin: MAX_JOBS*int(32);
lb1_children_bounds(lbound1_d
!.lb1_bound
, parent.prmu, parent.limit1, jobs, lb_begin);
lb1_children_bounds(lbound1_d, parent.prmu, parent.limit1, jobs, lb_begin);
for k in 0..#jobs {
for k in 0..#jobs {
if (k >= parent.limit1+1) {
if (k >= parent.limit1+1) {
...
@@ -257,7 +257,7 @@ proc evaluate_gpu_lb2(const parents_d: [] Node, const size, const best, const lb
...
@@ -257,7 +257,7 @@ proc evaluate_gpu_lb2(const parents_d: [] Node, const size, const best, const lb
if (k >= parent.limit1+1) {
if (k >= parent.limit1+1) {
prmu[depth] <=> prmu[k];
prmu[depth] <=> prmu[k];
bounds_d[threadId] = lb2_bound(lbound1_d
!.lb1_bound, lbound2_d!.lb2_boun
d, prmu, parent.limit1+1, jobs, best);
bounds_d[threadId] = lb2_bound(lbound1_d
, lbound2_
d, prmu, parent.limit1+1, jobs, best);
prmu[depth] <=> prmu[k];
prmu[depth] <=> prmu[k];
}
}
}
}
...
@@ -355,26 +355,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -355,26 +355,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
var parents: [0..#M] Node = noinit;
var parents: [0..#M] Node = noinit;
var bounds: [0..#(M*jobs)] int(32) = noinit;
var bounds: [0..#(M*jobs)] int(32) = noinit;
var lbound1_d: lbound1.type;
on device var parents_d: [0..#M] Node;
var lbound2_d: lbound2.type;
on device var bounds_d: [0..#(M*jobs)] int(32);
var parents_d: WrapperArrayParents;
var bounds_d: WrapperArrayBounds;
on device {
on device var lbound1_d = new lb1_bound_data(jobs, machines);
lbound1_d = new WrapperLB1(jobs, machines);
lbound1_d.p_times = lbound1.p_times;
lbound1_d!.lb1_bound.p_times = lbound1!.lb1_bound.p_times;
lbound1_d.min_heads = lbound1.min_heads;
lbound1_d!.lb1_bound.min_heads = lbound1!.lb1_bound.min_heads;
lbound1_d.min_tails = lbound1.min_tails;
lbound1_d!.lb1_bound.min_tails = lbound1!.lb1_bound.min_tails;
lbound2_d = new WrapperLB2(jobs, machines);
on device var lbound2_d = new lb2_bound_data(jobs, machines);
lbound2_d!.lb2_bound.johnson_schedules = lbound2!.lb2_bound.johnson_schedules;
lbound2_d.johnson_schedules = lbound2.johnson_schedules;
lbound2_d!.lb2_bound.lags = lbound2!.lb2_bound.lags;
lbound2_d.lags = lbound2.lags;
lbound2_d!.lb2_bound.machine_pairs = lbound2!.lb2_bound.machine_pairs;
lbound2_d.machine_pairs = lbound2.machine_pairs;
lbound2_d!.lb2_bound.machine_pair_order = lbound2!.lb2_bound.machine_pair_order;
lbound2_d.machine_pair_order = lbound2.machine_pair_order;
parents_d = new WrapperArrayParents();
bounds_d = new WrapperArrayBounds();
}
while true {
while true {
...
@@ -394,11 +387,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -394,11 +387,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
*/
*/
const numBounds = jobs * poolSize;
const numBounds = jobs * poolSize;
on device {
parents_d = parents; // host-to-device
parents_d!.arr = parents; // host-to-device
on device do evaluate_gpu(parents_d, numBounds, best, lbound1_d, lbound2_d, bounds_d);
evaluate_gpu(parents_d!.arr, numBounds, best, lbound1_d, lbound2_d, bounds_d!.arr);
bounds = bounds_d; // device-to-host
bounds = bounds_d!.arr; // device-to-host
}
/*
/*
Each task generates and inserts its children nodes to the pool.
Each task generates and inserts its children nodes to the pool.
...
...
This diff is collapsed.
Click to expand it.
pfsp_multigpu_chpl.chpl
+
19
−
28
View file @
b3fb0c76
...
@@ -261,13 +261,13 @@ proc evaluate_gpu(const parents_d: [] Node, const size, const best, const lbound
...
@@ -261,13 +261,13 @@ proc evaluate_gpu(const parents_d: [] Node, const size, const best, const lbound
{
{
select lb {
select lb {
when "lb1_d" {
when "lb1_d" {
evaluate_gpu_lb1_d(parents_d, size, best, lbound1_d
!.lb1_bound
, bounds_d);
evaluate_gpu_lb1_d(parents_d, size, best, lbound1_d, bounds_d);
}
}
when "lb1" {
when "lb1" {
evaluate_gpu_lb1(parents_d, size, lbound1_d
!.lb1_bound
, bounds_d);
evaluate_gpu_lb1(parents_d, size, lbound1_d, bounds_d);
}
}
otherwise { // lb2
otherwise { // lb2
evaluate_gpu_lb2(parents_d, size, best, lbound1_d
!.lb1_bound, lbound2_d!.lb2_boun
d, bounds_d);
evaluate_gpu_lb2(parents_d, size, best, lbound1_d
, lbound2_
d, bounds_d);
}
}
}
}
}
}
...
@@ -391,26 +391,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -391,26 +391,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
var parents: [0..#M] Node = noinit;
var parents: [0..#M] Node = noinit;
var bounds: [0..#(M*jobs)] int(32) = noinit;
var bounds: [0..#(M*jobs)] int(32) = noinit;
var lbound1_d: WrapperLB1;
on device var parents_d: [0..#M] Node;
var lbound2_d: WrapperLB2;
on device var bounds_d: [0..#(M*jobs)] int(32);
var parents_d: WrapperArrayParents;
var bounds_d: WrapperArrayBounds;
on device {
on device var lbound1_d = new lb1_bound_data(jobs, machines);
lbound1_d = new WrapperLB1(jobs, machines);
lbound1_d.p_times = lbound1.p_times;
lbound1_d!.lb1_bound.p_times = lbound1.p_times;
lbound1_d.min_heads = lbound1.min_heads;
lbound1_d!.lb1_bound.min_heads = lbound1.min_heads;
lbound1_d.min_tails = lbound1.min_tails;
lbound1_d!.lb1_bound.min_tails = lbound1.min_tails;
lbound2_d = new WrapperLB2(jobs, machines);
on device var lbound2_d = new lb2_bound_data(jobs, machines);
lbound2_d!.lb2_bound.johnson_schedules = lbound2.johnson_schedules;
lbound2_d.johnson_schedules = lbound2.johnson_schedules;
lbound2_d!.lb2_bound.lags = lbound2.lags;
lbound2_d.lags = lbound2.lags;
lbound2_d!.lb2_bound.machine_pairs = lbound2.machine_pairs;
lbound2_d.machine_pairs = lbound2.machine_pairs;
lbound2_d!.lb2_bound.machine_pair_order = lbound2.machine_pair_order;
lbound2_d.machine_pair_order = lbound2.machine_pair_order;
parents_d = new WrapperArrayParents();
bounds_d = new WrapperArrayBounds();
}
while true {
while true {
/*
/*
...
@@ -440,11 +433,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
...
@@ -440,11 +433,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
*/
*/
const numBounds = jobs * poolSize;
const numBounds = jobs * poolSize;
on device {
parents_d = parents; // host-to-device
parents_d!.arr = parents; // host-to-device
on device do evaluate_gpu(parents_d, numBounds, best_l, lbound1_d, lbound2_d, bounds_d);
evaluate_gpu(parents_d!.arr, numBounds, best_l, lbound1_d, lbound2_d, bounds_d!.arr);
bounds = bounds_d; // device-to-host
bounds = bounds_d!.arr; // device-to-host
}
/*
/*
Each task generates and inserts its children nodes to the pool.
Each task generates and inserts its children nodes to the pool.
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment