Source code for ai2_kit.algorithm.reweighting
from typing import Optional, Dict
from collections import namedtuple
from scipy.stats import gaussian_kde
import numpy as np
import json
from ai2_kit.core.util import expand_globs, ensure_dir
from ai2_kit.core.log import get_logger
from ai2_kit.lib import plumed
logger = get_logger(__name__)
default_kB = 0.0083144621 # Boltzmann constant in kJ/mol/K
default_ev_to_kjmol = 96.4853365 # conversion factor from eV to kJ/mol
_FesResult = namedtuple('FesResult', ['fes', 'grid', 'extend'])
[docs]
def compute_fes(cvs: np.ndarray, bias: np.ndarray, temp: float, grid=None,
w=1, kB=default_kB, grid_size=100j):
"""
Compute free energy surface from biased sampling data using Gaussian KDE
Support 1D and 2D collective variables
:param cv: collective variable
:param bias: bias potential
:param temp: temperature
:param grid: grid points, if None, will use grid_size to generate automatically
:param w: weights
:param kB: Boltzmann constant
:param grid_size: grid size in np.mgrid style
:return: free energy
"""
if cvs.ndim == 2:
if cvs.shape[0] == 1:
cvs = cvs[0] # flatten
elif cvs.shape[0] != 2:
raise ValueError(f'Invalid cvs shape {cvs.shape}, only support 1D or 2D cvs')
elif cvs.ndim != 1:
raise ValueError(f'Invalid cvs shape {cvs.shape}, only support 1D or 2D cvs')
kBT = kB * temp
beta = 1 / kBT
weights = w * np.exp(beta * bias)
kde = gaussian_kde(cvs, weights=weights)
extend = None
if cvs.ndim == 1: # handle 1d
if grid is None:
x_min, x_max = cvs.min(), cvs.max()
grid = np.mgrid[x_min:x_max:grid_size]
extend = [x_min, x_max]
pdf = kde.evaluate(grid)
else: # handle 2d
if grid is None:
x_min, x_max = cvs[0].min(), cvs[0].max()
y_min, y_max = cvs[1].min(), cvs[1].max()
grid = np.mgrid[x_min:x_max:grid_size, y_min:y_max:grid_size]
extend = [x_min, x_max, y_min, y_max]
X, Y = grid
pos = np.vstack([X.ravel(), Y.ravel()])
# TODO: generated by copilot, I am not sure if it is correct
pdf = kde.evaluate(pos).T.reshape(X.shape)
fes = -kBT * np.log(pdf)
# return fes with grid and extend for plotting
return _FesResult(fes=fes, grid=grid, extend=extend)
[docs]
def compute_kde_weight(baseline_energy: np.ndarray, target_energy: np.ndarray,
temp: float, kB=default_kB,
ev_to_kjmol=default_ev_to_kjmol,):
"""
Compute KDE weights for reweighting
:param baseline_energy: baseline energy in eV, it's the output of DeepMD evaluation
:param target_energy: target energy in eV, it's the output of DeepMD evaluation
"""
kBT = kB * temp
beta = 1 / kBT
# use relative energy and convert from eV to kJ/mol
be = (baseline_energy- np.min(baseline_energy)) * ev_to_kjmol
te = (target_energy- np.min(target_energy)) * ev_to_kjmol
return np.exp( -beta * (te - be))
[docs]
def load_dp_energy(*path_or_glob: str):
"""
load dpdata energy data from npy files
:param path_or_glob: path or glob pattern to locate data path, for example dpdata/**/energy.npy
"""
files = expand_globs(*path_or_glob, raise_invalid=True)
energies = []
for file in files:
energy = np.load(file)
energies.append(energy)
return np.concatenate(energies)
[docs]
class ReweightingTool:
def __init__(self):
self.data: Dict[str, np.ndarray] = {}
self.colvar_df = None
[docs]
def load_energy(self, *path_or_glob: str, tag: str):
"""
read baseline data as dpdata.LabeledSystem
:param path_or_glob: path or glob pattern to locate data path
:param tag: a string tag to distinguish data, it is suggested to use `baseline` and `target`
"""
energy = load_dp_energy(path_or_glob)
if tag in self.data:
self.data[tag] = np.concatenate([self.data[tag], energy])
else:
self.data[tag] = energy
return self
[docs]
def load_colvar(self, *path_or_glob: str):
"""
load PLUMED COLVAR files and concatenate into a single DataFrame
you have to ensure the COLVAR data is aligned with energies
:param path_or_glob: path or glob pattern to locate data path
"""
paths = expand_globs(path_or_glob)
self.colvar_df = plumed.load_colvar_from_files(*paths)
return self
[docs]
def reweighting(self, cv: str, bias: str, temp: float,
grid_size=0.01, save_fig_to: Optional[str]=None,
save_json_to: Optional[str]=None,
baseline_tag='baseline', target_tag='target'):
"""
run reweighting against loaded data
:param cv: name of collective variable columns, for example d1
:param bias: name of bias column, for example opes.bias
:param temp: temperature
:param save_to: save th
"""
import matplotlib.pyplot as plt
cvs_df, bias_df = plumed.get_cvs_bias_from_df(self.colvar_df, [cv], bias)
logger.info(f'cvs shape: {cvs_df.shape}, bias shape: {bias_df.shape}')
# compute baseline fes
baseline_fes = compute_fes(cvs_df, bias_df, temp=temp, grid_size=grid_size)
grid = baseline_fes.grid
# compute weight
baseline_e = self.data[baseline_tag].reshape((-1, 1))
target_e = self.data[target_tag].reshape((-1, 1))
logger.info(f'baseline energy shape: {baseline_e.shape}, target energy shape: {target_e.shape}')
w = compute_kde_weight(baseline_e, target_e, temp=temp)
w = w.reshape((-1,))
# compute target fes
target_fes = compute_fes(cvs_df, bias_df, temp=temp, grid=grid, w=w)
# use matplot to draw baseline and target 1D FES on the image
plt.plot(grid, baseline_fes.fes, label='Baseline FES')
plt.plot(grid, target_fes.fes, label='Target FES')
plt.xlabel('CV')
plt.ylabel('Free Energy/$kJ\cdot mol^{-1}$')
plt.legend()
fig = plt.gcf()
if save_json_to is not None:
data = {
'baseline_fes': baseline_fes.fes.tolist(),
'target_fes': target_fes.fes.tolist(),
'weight': w.tolist(),
'grid': grid.tolist(),
'extend': baseline_fes.extend,
}
with open(save_json_to, 'w') as f:
json.dump(data, f)
if save_fig_to is None:
fig.canvas.draw()
fig.canvas.flush_events()
else:
ensure_dir(save_fig_to)
fig.savefig(save_fig_to, dpi=300, bbox_inches='tight')