Source code for mlc_func.ml.ensemble_network

from .force_network import Force_Network, load_force_model
import numpy as np
import copy

[docs]class Ensemble_Network(): """Ensemble Network to obtain confidence intervals for predictions Parameters ----------- network: Network root network from which to create ensemble n: int ensemble size """ def __init__(self, network, n = 3): self.models = [] for _ in range(n): self.models.append(copy.copy(network)) if isinstance(network.X_train, np.ndarray): X_train, y_train, self.X_test, self.y_test =\ network.X_train, network.y_train, network.X_test, network.y_test self.X_valid = network.X_valid self.y_valid = network.y_valid for i, _ in enumerate(self.models): self.models[i].X_train = X_train[i::3] self.models[i].y_train = y_train[i::3] self.trained = [False, False, False] self.model_pointer = 0
[docs] def train(self, idx): """ Train model specified by idx Parameters -------- idx: int ensemble index """ self.trained[idx] = True self.models[idx].train(step_size = self.models[idx].learning_rate, b = self.models[idx].b, restart = True)
[docs] def train_next(self): """ Train the next untrained model """ cycle_cnt = 0 while(True): cycle_cnt += 1 idx = self.model_pointer self.model_pointer += 1 print('Training model: {}'.format(idx)) if self.trained[idx]: print('Model already trained') if cycle_cnt == 3: return 0 continue else: break self.trained[idx] = True self.models[idx].train(step_size = self.models[idx].learning_rate, b = self.models[idx].b, restart = True)
[docs] def predict(self, feat, processed = False): """ Get mean prediction across ensemble Parameters ---------- feat: np.ndarray input features processed: bool are features processed (scaled, masked)? Returns ------- np.ndarray mean prediction """ if not np.alltrue(self.trained): raise Exception('Not all models trained. Call train_next()') predictions = [] for m in self.models: predictions.append(m.predict(feat, processed)) return np.mean(np.array(predictions), axis = 0)
[docs] def predict_from_hdf5(self, path): """ Get mean prediction across ensemble but instead of providing features, give source path where features are found Parameters ---------- path: str path to .hdf5 file containing features Returns ------- np.ndarray mean prediction """ if not np.alltrue(self.trained): raise Exception('Not all models trained. Call train_next()') predictions = [] for m in self.models: predictions.append(m.predict_from_hdf5(path)) return np.mean(np.array(predictions), axis = 0)
[docs] def save(self, net_dir, override = False): """ Save ensemble Network Parameters ---------- net_dir: str directory where to save network override: bool if network already exists allow to override? default = False """ if net_dir[-1] == '/': net_dir = net_dir[:-1] for i, model in enumerate(self.models): model.save_all(net_dir + '/ensemble_{}'.format(i+1), override)
[docs] def std_predict(self, feat, processed = False): """ Get standard deviation of predictions across ensemble Parameters ---------- feat: np.ndarray input features processed: bool are features processed (scaled, masked)? Returns ------- np.ndarray std of predictions """ if not np.alltrue(self.trained): raise Exception('Not all models trained. Call train_next()') predictions = [] for m in self.models: predictions.append(m.predict(feat, processed)) std = np.std(np.array(predictions), axis = 0) return std
[docs]def load_ensemble_network(net_dir, n, species): """Loads an ensemble Network Parameters ---------- net_dir: str directory in which network is stored n: int number of networks per ensemble species: str which chem. element to load for Returns: ------- Ensemble_Network """ if net_dir[-1] == '/': net_dir = net_dir[:-1] if n > 0: model = load_force_model(net_dir + '/ensemble_1/', species) ensemble = Ensemble_Network(model, n) for i in range(n): ensemble.models[i] = load_force_model(net_dir + '/ensemble_{}/'.format(i+1), species) ensemble.trained[i] = [True] return ensemble