ai2_kit.domain.selector module

ai2_kit.domain.selector module#

class ai2_kit.domain.selector.CllModelDevSelectorContext(path_prefix: str, resource_manager: ai2_kit.core.resource_manager.ResourceManager)[source]#

Bases: BaseCllContext

path_prefix: str#

resource_manager: ResourceManager#

class ai2_kit.domain.selector.CllModelDeviSelectorInput(config: ai2_kit.domain.selector.CllModelDeviSelectorInputConfig, model_devi_data: List[ai2_kit.core.artifact.Artifact], model_devi_file: str, type_map: List[str])[source]#

Bases: object

config: CllModelDeviSelectorInputConfig#

model_devi_data: List[Artifact]#

model_devi_file: str#

set_model_devi_dataset(data: List[Artifact])[source]#

type_map: List[str]#

class ai2_kit.domain.selector.CllModelDeviSelectorInputConfig(*, f_trust_lo: float = 0.0, f_trust_hi: float = 65535.0, new_explore_system_q: float = 0.25, asap_options: Optional[AsapOptions] = None, screening_fn: Optional[str] = None, max_decent_per_traj: int = - 1, workers: int = 4)[source]#

Bases: BaseModel

class AsapOptions(*, disable: bool = False, limit_per_cluster: int = 1, sort_by_ssw_energy: bool = False, descriptor: dict = {'soap': {'crossover': False, 'element_wise': False, 'l_max': 6, 'n_max': 6, 'preset': 'minimal', 'r_cut': 3.5, 'rbf': 'gto', 'reducer_type': 'average', 'sigma': 0.5, 'zeta': 1}}, dim_reducer: dict = {'pca': {'parameter': {'n_components': 3, 'scalecenter': True}, 'type': 'PCA'}}, cluster: dict = {'dbscan': {}})[source]#

Bases: BaseModel

cluster: dict#

descriptor: dict#

dim_reducer: dict#

disable: bool#

limit_per_cluster: int#: limit the number of structures to be selected from the same cluster

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'cluster': FieldInfo(annotation=dict, required=False, default={'dbscan': {}}), 'descriptor': FieldInfo(annotation=dict, required=False, default={'soap': {'preset': 'minimal', 'r_cut': 3.5, 'n_max': 6, 'l_max': 6, 'sigma': 0.5, 'crossover': False, 'rbf': 'gto', 'reducer_type': 'average', 'element_wise': False, 'zeta': 1}}), 'dim_reducer': FieldInfo(annotation=dict, required=False, default={'pca': {'type': 'PCA', 'parameter': {'n_components': 3, 'scalecenter': True}}}), 'disable': FieldInfo(annotation=bool, required=False, default=False), 'limit_per_cluster': FieldInfo(annotation=int, required=False, default=1), 'sort_by_ssw_energy': FieldInfo(annotation=bool, required=False, default=False)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

sort_by_ssw_energy: bool#: sorted the structures by ssw_energy in each cluster

asap_options: Optional[AsapOptions]#: options for ASAP to further select candidates

f_trust_hi: float#: the upper bound of model_devi score to select the structure for labeling

f_trust_lo: float#: the lower bound of model_devi score to select the structure for labeling

max_decent_per_traj: int#: limit the max number of decent structures per trajectory, -1 means unlimited

model_computed_fields: ClassVar[dict[str, ComputedFieldInfo]] = {}#: A dictionary of computed field names and their corresponding ComputedFieldInfo objects.

model_config: ClassVar[ConfigDict] = {'extra': 'forbid'}#: Configuration for the model, should be a dictionary conforming to [ConfigDict][pydantic.config.ConfigDict].

model_fields: ClassVar[dict[str, FieldInfo]] = {'asap_options': FieldInfo(annotation=Union[CllModelDeviSelectorInputConfig.AsapOptions, NoneType], required=False, default=None), 'f_trust_hi': FieldInfo(annotation=float, required=False, default=65535.0), 'f_trust_lo': FieldInfo(annotation=float, required=False, default=0.0), 'max_decent_per_traj': FieldInfo(annotation=int, required=False, default=-1), 'new_explore_system_q': FieldInfo(annotation=float, required=False, default=0.25), 'screening_fn': FieldInfo(annotation=Union[str, NoneType], required=False, default=None), 'workers': FieldInfo(annotation=int, required=False, default=4)}#

Metadata about the fields defined on the model, mapping of field names to [FieldInfo][pydantic.fields.FieldInfo].

This replaces Model.__fields__ from Pydantic V1.

new_explore_system_q: float#: the quantile of model_devi score to select the structure for next round of exploration

screening_fn: Optional[str]#: the function to screen the candidates, e.g “lambda x: x[‘ssw_energy’] < -1000”

workers: int#: number of workers to run the analysis

class ai2_kit.domain.selector.CllModelDeviSelectorOutput(candidates: List[ai2_kit.core.artifact.Artifact], passing_rate: float, new_explore_systems: List[ai2_kit.core.artifact.Artifact])[source]#

Bases: ICllSelectorOutput

candidates: List[Artifact]#

get_model_devi_dataset()[source]#

get_new_explore_systems() → List[Artifact][source]#

get_passing_rate() → float[source]#

new_explore_systems: List[Artifact]#

passing_rate: float#

ai2_kit.domain.selector.bulk_select_distinct_structures(candidates: List[ArtifactDict], descriptor_opt: dict, dim_reducer_opt: dict, cluster_opt: dict, type_map: List[str], work_dir: str, limit_per_cluster: int = - 1, sort_by_energy: bool = False, workers: int = 4) → List[ArtifactDict][source]#

ai2_kit.domain.selector.bulk_select_structures_by_model_devi(model_devi_outputs: List[ArtifactDict], model_devi_file: str, f_trust_lo: float, f_trust_hi: float, new_explore_system_q: float, type_map: List[str], work_dir: str, max_decent_per_traj: int, screening_fn: Optional[str], workers: int = 4) → List[Tuple[Dict[str, ArtifactDict], dict]][source]#

async ai2_kit.domain.selector.cll_model_devi_selector(input: CllModelDeviSelectorInput, ctx: CllModelDevSelectorContext)[source]#

ai2_kit.domain.selector.select_distinct_structures(candidates: List[ArtifactDict], attrs: dict, descriptor_opt: dict, dim_reducer_opt: dict, cluster_opt: dict, type_map: List[str], work_dir: str, limit_per_cluster: int = - 1, sort_by_energy: bool = False)[source]#

ai2_kit.domain.selector.select_structures_by_model_devi(model_devi_output: ArtifactDict, model_devi_file: str, f_trust_lo: float, f_trust_hi: float, type_map: List[str], work_dir: str, new_explore_system_q: float, max_decent_per_traj: int, screening_fn: Optional[str]) → Tuple[Dict[str, ArtifactDict], dict][source]#

analysis the model_devi output of explore stage and select candidates

Parameters:: next_explore_system_q – the quantile of model_devi score to select the structure for next round of exploration