Source code for ai2_kit.algorithm.reweighting

from typing import Optional, Dict
from collections import namedtuple

from scipy.stats import gaussian_kde
import numpy as np
import json

from ai2_kit.core.util import expand_globs, ensure_dir
from ai2_kit.core.log import get_logger
from ai2_kit.lib import plumed


logger = get_logger(__name__)


default_kB = 0.0083144621  # Boltzmann constant in kJ/mol/K
default_ev_to_kjmol = 96.4853365  # conversion factor from eV to kJ/mol


_FesResult = namedtuple('FesResult', ['fes', 'grid', 'extend'])



[docs]
def compute_fes(cvs: np.ndarray, bias: np.ndarray, temp: float, grid=None,
                w=1, kB=default_kB, grid_size=100j):
    """
    Compute free energy surface from biased sampling data using Gaussian KDE
    Support 1D and 2D collective variables

    :param cv: collective variable
    :param bias: bias potential
    :param temp: temperature
    :param grid: grid points, if None, will use grid_size to generate automatically
    :param w: weights
    :param kB: Boltzmann constant
    :param grid_size: grid size in np.mgrid style

    :return: free energy
    """
    if cvs.ndim == 2:
        if cvs.shape[0] == 1:
            cvs = cvs[0]  # flatten
        elif cvs.shape[0] != 2:
            raise ValueError(f'Invalid cvs shape {cvs.shape}, only support 1D or 2D cvs')
    elif cvs.ndim != 1:
        raise ValueError(f'Invalid cvs shape {cvs.shape}, only support 1D or 2D cvs')

    kBT = kB * temp
    beta = 1 / kBT
    weights = w * np.exp(beta * bias)
    kde = gaussian_kde(cvs, weights=weights)

    extend = None
    if cvs.ndim == 1:  # handle 1d
        if grid is None:
            x_min, x_max = cvs.min(), cvs.max()
            grid = np.mgrid[x_min:x_max:grid_size]
            extend = [x_min, x_max]
        pdf = kde.evaluate(grid)
    else:  # handle 2d
        if grid is None:
            x_min, x_max = cvs[0].min(), cvs[0].max()
            y_min, y_max = cvs[1].min(), cvs[1].max()
            grid = np.mgrid[x_min:x_max:grid_size, y_min:y_max:grid_size]
            extend = [x_min, x_max, y_min, y_max]
        X, Y = grid
        pos = np.vstack([X.ravel(), Y.ravel()])
        # TODO: generated by copilot, I am not sure if it is correct
        pdf = kde.evaluate(pos).T.reshape(X.shape)
    fes = -kBT * np.log(pdf)
    # return fes with grid and extend for plotting
    return _FesResult(fes=fes, grid=grid, extend=extend)




[docs]
def compute_kde_weight(baseline_energy: np.ndarray, target_energy: np.ndarray,
                       temp: float, kB=default_kB,
                       ev_to_kjmol=default_ev_to_kjmol,):
    """
    Compute KDE weights for reweighting

    :param baseline_energy: baseline energy in eV, it's the output of DeepMD evaluation
    :param target_energy: target energy in eV, it's the output of DeepMD evaluation
    """
    kBT = kB * temp
    beta = 1 / kBT

    # use relative energy and convert from eV to kJ/mol
    be = (baseline_energy- np.min(baseline_energy)) * ev_to_kjmol
    te = (target_energy- np.min(target_energy)) * ev_to_kjmol
    return np.exp( -beta * (te - be))




[docs]
def load_dp_energy(*path_or_glob: str):
    """
    load dpdata energy data from npy files

    :param path_or_glob: path or glob pattern to locate data path, for example dpdata/**/energy.npy
    """

    files = expand_globs(*path_or_glob, raise_invalid=True)
    energies = []
    for file in files:
        energy = np.load(file)
        energies.append(energy)

    return np.concatenate(energies)




[docs]
class ReweightingTool:

    def __init__(self):
        self.data: Dict[str, np.ndarray] = {}
        self.colvar_df = None


[docs]
    def load_energy(self, *path_or_glob: str, tag: str):
        """
        read baseline data as dpdata.LabeledSystem

        :param path_or_glob: path or glob pattern to locate data path
        :param tag: a string tag to distinguish data, it is suggested to use `baseline` and `target`
        """
        energy = load_dp_energy(path_or_glob)
        if tag in self.data:
            self.data[tag] = np.concatenate([self.data[tag], energy])
        else:
            self.data[tag] = energy
        return self




[docs]
    def load_colvar(self, *path_or_glob: str):
        """
        load PLUMED COLVAR files and concatenate into a single DataFrame
        you have to ensure the COLVAR data is aligned with energies

        :param path_or_glob: path or glob pattern to locate data path
        """

        paths = expand_globs(path_or_glob)
        self.colvar_df = plumed.load_colvar_from_files(*paths)
        return self



[docs]
    def reweighting(self, cv: str, bias: str, temp: float,
                    grid_size=0.01, save_fig_to: Optional[str]=None,
                    save_json_to: Optional[str]=None,
                    baseline_tag='baseline', target_tag='target'):
        """
        run reweighting against loaded data

        :param cv: name of collective variable columns, for example d1
        :param bias: name of bias column, for example opes.bias
        :param temp: temperature
        :param save_to: save th
        """
        import matplotlib.pyplot as plt

        cvs_df, bias_df = plumed.get_cvs_bias_from_df(self.colvar_df, [cv], bias)
        logger.info(f'cvs shape: {cvs_df.shape}, bias shape: {bias_df.shape}')

        # compute baseline fes
        baseline_fes = compute_fes(cvs_df, bias_df, temp=temp, grid_size=grid_size)
        grid = baseline_fes.grid

        # compute weight
        baseline_e = self.data[baseline_tag].reshape((-1, 1))
        target_e = self.data[target_tag].reshape((-1, 1))
        logger.info(f'baseline energy shape: {baseline_e.shape}, target energy shape: {target_e.shape}')

        w = compute_kde_weight(baseline_e, target_e, temp=temp)
        w = w.reshape((-1,))

        # compute target fes
        target_fes = compute_fes(cvs_df, bias_df, temp=temp, grid=grid, w=w)

        # use matplot to draw baseline and target 1D FES on the image
        plt.plot(grid, baseline_fes.fes, label='Baseline FES')
        plt.plot(grid, target_fes.fes, label='Target FES')

        plt.xlabel('CV')
        plt.ylabel('Free Energy/$kJ\cdot mol^{-1}$')

        plt.legend()
        fig = plt.gcf()

        if save_json_to is not None:
            data = {
                'baseline_fes': baseline_fes.fes.tolist(),
                'target_fes': target_fes.fes.tolist(),
                'weight': w.tolist(),
                'grid': grid.tolist(),
                'extend': baseline_fes.extend,
            }
            with open(save_json_to, 'w') as f:
                json.dump(data, f)

        if save_fig_to is None:
            fig.canvas.draw()
            fig.canvas.flush_events()
        else:
            ensure_dir(save_fig_to)
            fig.savefig(save_fig_to, dpi=300, bbox_inches='tight')