# Copyright 2019 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional
from ConfigSpace.conditions import EqualsCondition
from ConfigSpace.hyperparameters import (
CategoricalHyperparameter,
Hyperparameter,
UniformFloatHyperparameter,
UniformIntegerHyperparameter,
)
from smac.configspace import ConfigurationSpace
from lale.search.PGO import PGO
from lale.search.search_space import (
SearchSpace,
SearchSpaceArray,
SearchSpaceEnum,
SearchSpaceNumber,
should_print_search_space,
)
from lale.search.search_space_grid import SearchSpaceGrid, get_search_space_grids
if TYPE_CHECKING:
import lale.operators as Ops
[docs]def lale_op_smac_tae(op: "Ops.PlannedOperator", f_min):
# TODO: we can probably do this in a different way, but get_smac_configuration_space
# we already have these sklearn compatibility wrappers it is easier for now to use them
op_compat = op
def f(cfg):
from sklearn.base import clone
wrapped_op = clone(op_compat)
cfg2 = smac_fixup_params(cfg)
trainable = wrapped_op.set_params(**cfg2)
return f_min(trainable)
return f
[docs]def lale_trainable_op_from_config(
op: "Ops.PlannedOperator", cfg
) -> "Ops.TrainableOperator":
from sklearn.base import clone
op_compat = op
wrapped_op = clone(op_compat)
cfg2 = smac_fixup_params(cfg)
trainable = wrapped_op.with_params(**cfg2)
return trainable
[docs]def get_smac_space(
op: "Ops.PlannedOperator",
lale_num_grids: Optional[float] = None,
lale_pgo: Optional[PGO] = None,
data_schema: Optional[Dict[str, Any]] = None,
) -> ConfigurationSpace:
"""Top level function: given a lale operator, returns a ConfigurationSpace for use with SMAC.
Parameters
----------
op : The lale PlannedOperator
lale_num_grids: integer or float, optional
if set to an integer => 1, it will determine how many parameter grids will be returned (at most)
if set to an float between 0 and 1, it will determine what fraction should be returned
note that setting it to 1 is treated as in integer. To return all results, use None
lale_pgo: Optional profile guided optimization data that guides discretization
data_schema: Optional schema for the input data. which is used for hyperparameter schema data constraints
"""
hp_grids = get_search_space_grids(
op, num_grids=lale_num_grids, pgo=lale_pgo, data_schema=data_schema
)
cs = hp_grids_to_smac_cs(hp_grids)
if should_print_search_space("true", "all", "backend", "smac"):
name = op.name()
if not name:
name = "an operator"
print(f"SMAC configuration for {name}:\n{str(cs)}")
return cs
[docs]def smac_fixup_params(cfg):
def strip_key(k: str) -> str:
return k.rsplit("_", 1)[0]
def transform_value(v):
if v == "_lale_none":
return None
else:
return v
ret = {
strip_key(k): transform_value(v)
for (k, v) in cfg.get_dictionary().items()
if k != "disjunct_discriminant"
}
return ret
# When sampling from distributions, this is the default number of samples to take.
# Users can override this by passing in num_samples to the appropriate function
SAMPLES_PER_DISTRIBUTION = 2
# We can first convert from our search space IR
# to a more limited grid structure
# This can than be converted to the format required for SMAC
[docs]def SearchSpaceNumberToSMAC(key: str, hp: SearchSpaceNumber) -> Hyperparameter:
"""Returns either a list of values intended to be sampled uniformly or a frozen scipy.stats distribution"""
dist = "uniform"
if hp.distribution:
dist = hp.distribution
if hp.maximum is None:
raise ValueError(
f"maximum not specified for a number with distribution {dist} for {key}"
)
space_max = hp.getInclusiveMax()
if hp.minimum is None:
raise ValueError(
f"minimum not specified for a number with distribution {dist} for {key}"
)
space_min = hp.getInclusiveMin()
log: bool
if dist in ["uniform", "integer"]:
log = False
elif dist == "loguniform":
log = True
else:
raise ValueError(f"unknown/unsupported distribution {dist} for {key}")
if hp.discrete:
return UniformIntegerHyperparameter(key, space_min, space_max, log=log)
else:
return UniformFloatHyperparameter(key, space_min, space_max, log=log)
MyFakeNone = FakeNone()
[docs]def HPValuetoSMAC(key: str, hp: SearchSpace) -> Hyperparameter:
def val_to_str(v):
if v is None:
return "_lale_none"
else:
return v
if isinstance(hp, SearchSpaceEnum):
return CategoricalHyperparameter(key, [val_to_str(x) for x in hp.vals])
elif isinstance(hp, SearchSpaceNumber):
return SearchSpaceNumberToSMAC(key, hp)
elif isinstance(hp, SearchSpaceArray):
raise ValueError(
f"Arrays are not yet supported by the SMAC backend (key: {key})"
)
else:
raise ValueError(
f"Not yet supported hp description ({type(hp)}) (key: {key}) in the GridSearchCV backend"
)
[docs]def SearchSpaceGridtoSMAC(hp: SearchSpaceGrid, disc: int) -> Iterable[Hyperparameter]:
return (HPValuetoSMAC(f"{k}_{disc}", v) for k, v in hp.items())
disc_str = "disjunct_discriminant"
[docs]def addSearchSpaceGrid(
hp: SearchSpaceGrid, disc: int, parent_disc: Hyperparameter, cs: ConfigurationSpace
) -> None:
smac = SearchSpaceGridtoSMAC(hp, disc)
for hyp in smac:
cs.add_hyperparameter(hyp)
cs.add_condition(EqualsCondition(child=hyp, parent=parent_disc, value=disc))
[docs]def addSearchSpaceGrids(grids: List[SearchSpaceGrid], cs: ConfigurationSpace) -> None:
parent_disc = CategoricalHyperparameter(disc_str, range(len(grids)))
cs.add_hyperparameter(parent_disc)
for i, g in enumerate(grids):
addSearchSpaceGrid(g, i, parent_disc, cs)
[docs]def hp_grids_to_smac_cs(grids: List[SearchSpaceGrid]) -> ConfigurationSpace:
cs: ConfigurationSpace = ConfigurationSpace()
addSearchSpaceGrids(grids, cs)
return cs