Open
Description
description
When using multisession with future, graph learner containing colapply
raises an error, which never exists before and exists in sequential mode.
error message
> # not work
> future::plan('multisession', workers = 4)
> tuner_xgbcox$optimize(instance_xgbcox)
INFO [21:50:00.682] [bbotk] Starting to optimize 9 parameter(s) with '<OptimizerBatchHyperband>' and '<TerminatorEvals> [n_evals=25, k=0]'
INFO [21:50:00.714] [bbotk] Evaluating 4 configuration(s)
INFO [21:50:00.722] [mlr3] Running benchmark with 12 resampling iterations
INFO [21:50:01.575] [mlr3] Applying learner 'removeconstants.colapply.surv.xgboost.cox' on task 'lung' (iter 1/3)
Error in .__Task__col_roles(self = self, private = private, super = super, :
Assertion on 'names of col_roles' failed: Names must be a permutation of set {'feature','target','name','order','stratum','group','weight'}, but has extra elements {'always_included'}.
This happened PipeOp colapply's $train()
OS
windows, wsl2, ubuntu22.04
version
mlr3verse updated to newest:
> mlr3verse_info()
Key: <package>
package version
<char> <char>
1: bbotk 1.5.0
2: miesmuschel <NA>
3: mlr3 0.22.1
4: mlr3batchmark <NA>
5: mlr3benchmark <NA>
6: mlr3cluster 0.1.10
7: mlr3data 0.9.0
8: mlr3db <NA>
9: mlr3fairness <NA>
10: mlr3fda <NA>
11: mlr3filters 0.8.1
12: mlr3fselect 1.3.0
13: mlr3hyperband 0.6.0
14: mlr3inferr 0.1.0
15: mlr3learners 0.9.0
16: mlr3mbo 0.2.8
17: mlr3misc 0.16.0
18: mlr3oml <NA>
19: mlr3pipelines 0.7.1
20: mlr3proba 0.7.4
21: mlr3spatial <NA>
22: mlr3spatiotempcv <NA>
23: mlr3summary <NA>
24: mlr3torch <NA>
25: mlr3tuning 1.3.0
26: mlr3tuningspaces 0.5.2
27: mlr3viz 0.10.1
28: paradox 1.0.1
29: rush <NA>
package version
how to reproduce
library(dplyr)
library(survival)
library(mlr3verse)
library(mlr3proba)
lung_filter = lung |> select(-sex,-ph.ecog)
task = TaskSurv$new(id="lung",backend = lung_filter,time = "time",event = "status")
learner_xgbcox = lrn('surv.xgboost.cox')
learner_xgbcox$param_set$set_values(
tree_method = 'hist',
device = 'cuda',
booster = 'gbtree',
nrounds = to_tune(p_int(128, 512, tags = 'budget')),
eta = to_tune(1e-4, 1, logscale = TRUE),
gamma = to_tune(1e-5, 7, logscale = TRUE),
max_depth = to_tune(1, 20),
colsample_bytree = to_tune(1e-2, 1),
colsample_bylevel = to_tune(1e-2, 1),
lambda = to_tune(1e-3, 1e3, logscale = TRUE),
alpha = to_tune(1e-3, 1e3, logscale = TRUE),
subsample = to_tune(1e-1, 1)
)
prep_xgbcox = po('removeconstants') %>>%
po('colapply', applicator = as.integer, affect_columns = selector_type('factor'))
glearner_xgbcox = prep_xgbcox %>>% learner_xgbcox
tuner_xgbcox = tnr('hyperband', eta = 2, repetitions = 1)
instance_xgbcox = ti(
task = task,
learner = glearner_xgbcox,
resampling = rsmp('cv', folds = 3),
measures = msr('surv.cindex'),
terminator = trm('evals', n_evals = 25)
)
# not work
future::plan('multisession', workers = 4)
tuner_xgbcox$optimize(instance_xgbcox)
# work
future::plan('sequential')
tuner_xgbcox$optimize(instance_xgbcox)