Skip to content
Snippets Groups Projects
Commit b3fb0c76 authored by Guillaume-Helbecque's avatar Guillaume-Helbecque
Browse files

refactor array allocations

parent 2903c980
Branches
No related tags found
No related merge requests found
...@@ -35,14 +35,14 @@ config const ub: int = 1; // initial upper bound ...@@ -35,14 +35,14 @@ config const ub: int = 1; // initial upper bound
const jobs = taillard_get_nb_jobs(inst); const jobs = taillard_get_nb_jobs(inst);
const machines = taillard_get_nb_machines(inst); const machines = taillard_get_nb_machines(inst);
var lbound1 = new WrapperLB1(jobs, machines); //lb1_bound_data(jobs, machines); var lbound1 = new lb1_bound_data(jobs, machines);
taillard_get_processing_times(lbound1!.lb1_bound.p_times, inst); taillard_get_processing_times(lbound1.p_times, inst);
fill_min_heads_tails(lbound1!.lb1_bound); fill_min_heads_tails(lbound1);
var lbound2 = new WrapperLB2(jobs, machines); var lbound2 = new lb2_bound_data(jobs, machines);
fill_machine_pairs(lbound2!.lb2_bound/*, LB2_FULL*/); fill_machine_pairs(lbound2/*, LB2_FULL*/);
fill_lags(lbound1!.lb1_bound.p_times, lbound2!.lb2_bound); fill_lags(lbound1.p_times, lbound2);
fill_johnson_schedules(lbound1!.lb1_bound.p_times, lbound2!.lb2_bound); fill_johnson_schedules(lbound1.p_times, lbound2);
const initUB = if (ub == 1) then taillard_get_best_ub(inst) else max(int); const initUB = if (ub == 1) then taillard_get_best_ub(inst) else max(int);
...@@ -105,7 +105,7 @@ proc decompose_lb1(const parent: Node, ref tree_loc: uint, ref num_sol: uint, ...@@ -105,7 +105,7 @@ proc decompose_lb1(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
child.prmu = parent.prmu; child.prmu = parent.prmu;
child.prmu[parent.depth] <=> child.prmu[i]; child.prmu[parent.depth] <=> child.prmu[i];
var lowerbound = lb1_bound(lbound1!.lb1_bound, child.prmu, child.limit1, jobs); var lowerbound = lb1_bound(lbound1, child.prmu, child.limit1, jobs);
if (child.depth == jobs) { // if child leaf if (child.depth == jobs) { // if child leaf
num_sol += 1; num_sol += 1;
...@@ -127,7 +127,7 @@ proc decompose_lb1_d(const parent: Node, ref tree_loc: uint, ref num_sol: uint, ...@@ -127,7 +127,7 @@ proc decompose_lb1_d(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
{ {
var lb_begin: MAX_JOBS*int(32); var lb_begin: MAX_JOBS*int(32);
lb1_children_bounds(lbound1!.lb1_bound, parent.prmu, parent.limit1, jobs, lb_begin); lb1_children_bounds(lbound1, parent.prmu, parent.limit1, jobs, lb_begin);
for i in parent.limit1+1..(jobs-1) { for i in parent.limit1+1..(jobs-1) {
const job = parent.prmu[i]; const job = parent.prmu[i];
...@@ -164,7 +164,7 @@ proc decompose_lb2(const parent: Node, ref tree_loc: uint, ref num_sol: uint, ...@@ -164,7 +164,7 @@ proc decompose_lb2(const parent: Node, ref tree_loc: uint, ref num_sol: uint,
child.prmu = parent.prmu; child.prmu = parent.prmu;
child.prmu[parent.depth] <=> child.prmu[i]; child.prmu[parent.depth] <=> child.prmu[i];
var lowerbound = lb2_bound(lbound1!.lb1_bound, lbound2!.lb2_bound, child.prmu, child.limit1, jobs, best); var lowerbound = lb2_bound(lbound1, lbound2, child.prmu, child.limit1, jobs, best);
if (child.depth == jobs) { // if child leaf if (child.depth == jobs) { // if child leaf
num_sol += 1; num_sol += 1;
...@@ -211,7 +211,7 @@ proc evaluate_gpu_lb1(const parents_d: [] Node, const size, const lbound1_d, ref ...@@ -211,7 +211,7 @@ proc evaluate_gpu_lb1(const parents_d: [] Node, const size, const lbound1_d, ref
if (k >= parent.limit1+1) { if (k >= parent.limit1+1) {
prmu[depth] <=> prmu[k]; prmu[depth] <=> prmu[k];
bounds_d[threadId] = lb1_bound(lbound1_d!.lb1_bound, prmu, parent.limit1+1, jobs); bounds_d[threadId] = lb1_bound(lbound1_d, prmu, parent.limit1+1, jobs);
prmu[depth] <=> prmu[k]; prmu[depth] <=> prmu[k];
} }
} }
...@@ -233,7 +233,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const ...@@ -233,7 +233,7 @@ proc evaluate_gpu_lb1_d(const parents_d: [] Node, const size, const best, const
var lb_begin: MAX_JOBS*int(32); var lb_begin: MAX_JOBS*int(32);
lb1_children_bounds(lbound1_d!.lb1_bound, parent.prmu, parent.limit1, jobs, lb_begin); lb1_children_bounds(lbound1_d, parent.prmu, parent.limit1, jobs, lb_begin);
for k in 0..#jobs { for k in 0..#jobs {
if (k >= parent.limit1+1) { if (k >= parent.limit1+1) {
...@@ -257,7 +257,7 @@ proc evaluate_gpu_lb2(const parents_d: [] Node, const size, const best, const lb ...@@ -257,7 +257,7 @@ proc evaluate_gpu_lb2(const parents_d: [] Node, const size, const best, const lb
if (k >= parent.limit1+1) { if (k >= parent.limit1+1) {
prmu[depth] <=> prmu[k]; prmu[depth] <=> prmu[k];
bounds_d[threadId] = lb2_bound(lbound1_d!.lb1_bound, lbound2_d!.lb2_bound, prmu, parent.limit1+1, jobs, best); bounds_d[threadId] = lb2_bound(lbound1_d, lbound2_d, prmu, parent.limit1+1, jobs, best);
prmu[depth] <=> prmu[k]; prmu[depth] <=> prmu[k];
} }
} }
...@@ -355,26 +355,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint ...@@ -355,26 +355,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
var parents: [0..#M] Node = noinit; var parents: [0..#M] Node = noinit;
var bounds: [0..#(M*jobs)] int(32) = noinit; var bounds: [0..#(M*jobs)] int(32) = noinit;
var lbound1_d: lbound1.type; on device var parents_d: [0..#M] Node;
var lbound2_d: lbound2.type; on device var bounds_d: [0..#(M*jobs)] int(32);
var parents_d: WrapperArrayParents;
var bounds_d: WrapperArrayBounds;
on device { on device var lbound1_d = new lb1_bound_data(jobs, machines);
lbound1_d = new WrapperLB1(jobs, machines); lbound1_d.p_times = lbound1.p_times;
lbound1_d!.lb1_bound.p_times = lbound1!.lb1_bound.p_times; lbound1_d.min_heads = lbound1.min_heads;
lbound1_d!.lb1_bound.min_heads = lbound1!.lb1_bound.min_heads; lbound1_d.min_tails = lbound1.min_tails;
lbound1_d!.lb1_bound.min_tails = lbound1!.lb1_bound.min_tails;
lbound2_d = new WrapperLB2(jobs, machines); on device var lbound2_d = new lb2_bound_data(jobs, machines);
lbound2_d!.lb2_bound.johnson_schedules = lbound2!.lb2_bound.johnson_schedules; lbound2_d.johnson_schedules = lbound2.johnson_schedules;
lbound2_d!.lb2_bound.lags = lbound2!.lb2_bound.lags; lbound2_d.lags = lbound2.lags;
lbound2_d!.lb2_bound.machine_pairs = lbound2!.lb2_bound.machine_pairs; lbound2_d.machine_pairs = lbound2.machine_pairs;
lbound2_d!.lb2_bound.machine_pair_order = lbound2!.lb2_bound.machine_pair_order; lbound2_d.machine_pair_order = lbound2.machine_pair_order;
parents_d = new WrapperArrayParents();
bounds_d = new WrapperArrayBounds();
}
while true { while true {
...@@ -394,11 +387,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint ...@@ -394,11 +387,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
*/ */
const numBounds = jobs * poolSize; const numBounds = jobs * poolSize;
on device { parents_d = parents; // host-to-device
parents_d!.arr = parents; // host-to-device on device do evaluate_gpu(parents_d, numBounds, best, lbound1_d, lbound2_d, bounds_d);
evaluate_gpu(parents_d!.arr, numBounds, best, lbound1_d, lbound2_d, bounds_d!.arr); bounds = bounds_d; // device-to-host
bounds = bounds_d!.arr; // device-to-host
}
/* /*
Each task generates and inserts its children nodes to the pool. Each task generates and inserts its children nodes to the pool.
......
...@@ -261,13 +261,13 @@ proc evaluate_gpu(const parents_d: [] Node, const size, const best, const lbound ...@@ -261,13 +261,13 @@ proc evaluate_gpu(const parents_d: [] Node, const size, const best, const lbound
{ {
select lb { select lb {
when "lb1_d" { when "lb1_d" {
evaluate_gpu_lb1_d(parents_d, size, best, lbound1_d!.lb1_bound, bounds_d); evaluate_gpu_lb1_d(parents_d, size, best, lbound1_d, bounds_d);
} }
when "lb1" { when "lb1" {
evaluate_gpu_lb1(parents_d, size, lbound1_d!.lb1_bound, bounds_d); evaluate_gpu_lb1(parents_d, size, lbound1_d, bounds_d);
} }
otherwise { // lb2 otherwise { // lb2
evaluate_gpu_lb2(parents_d, size, best, lbound1_d!.lb1_bound, lbound2_d!.lb2_bound, bounds_d); evaluate_gpu_lb2(parents_d, size, best, lbound1_d, lbound2_d, bounds_d);
} }
} }
} }
...@@ -391,26 +391,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint ...@@ -391,26 +391,19 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
var parents: [0..#M] Node = noinit; var parents: [0..#M] Node = noinit;
var bounds: [0..#(M*jobs)] int(32) = noinit; var bounds: [0..#(M*jobs)] int(32) = noinit;
var lbound1_d: WrapperLB1; on device var parents_d: [0..#M] Node;
var lbound2_d: WrapperLB2; on device var bounds_d: [0..#(M*jobs)] int(32);
var parents_d: WrapperArrayParents;
var bounds_d: WrapperArrayBounds;
on device { on device var lbound1_d = new lb1_bound_data(jobs, machines);
lbound1_d = new WrapperLB1(jobs, machines); lbound1_d.p_times = lbound1.p_times;
lbound1_d!.lb1_bound.p_times = lbound1.p_times; lbound1_d.min_heads = lbound1.min_heads;
lbound1_d!.lb1_bound.min_heads = lbound1.min_heads; lbound1_d.min_tails = lbound1.min_tails;
lbound1_d!.lb1_bound.min_tails = lbound1.min_tails;
lbound2_d = new WrapperLB2(jobs, machines); on device var lbound2_d = new lb2_bound_data(jobs, machines);
lbound2_d!.lb2_bound.johnson_schedules = lbound2.johnson_schedules; lbound2_d.johnson_schedules = lbound2.johnson_schedules;
lbound2_d!.lb2_bound.lags = lbound2.lags; lbound2_d.lags = lbound2.lags;
lbound2_d!.lb2_bound.machine_pairs = lbound2.machine_pairs; lbound2_d.machine_pairs = lbound2.machine_pairs;
lbound2_d!.lb2_bound.machine_pair_order = lbound2.machine_pair_order; lbound2_d.machine_pair_order = lbound2.machine_pair_order;
parents_d = new WrapperArrayParents();
bounds_d = new WrapperArrayBounds();
}
while true { while true {
/* /*
...@@ -440,11 +433,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint ...@@ -440,11 +433,9 @@ proc pfsp_search(ref optimum: int, ref exploredTree: uint, ref exploredSol: uint
*/ */
const numBounds = jobs * poolSize; const numBounds = jobs * poolSize;
on device { parents_d = parents; // host-to-device
parents_d!.arr = parents; // host-to-device on device do evaluate_gpu(parents_d, numBounds, best_l, lbound1_d, lbound2_d, bounds_d);
evaluate_gpu(parents_d!.arr, numBounds, best_l, lbound1_d, lbound2_d, bounds_d!.arr); bounds = bounds_d; // device-to-host
bounds = bounds_d!.arr; // device-to-host
}
/* /*
Each task generates and inserts its children nodes to the pool. Each task generates and inserts its children nodes to the pool.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment