Commit 76d6cde7 authored by Mark Hymers's avatar Mark Hymers

Merge branch '34-pca-functions' into 'master'

Resolve "PCA functions"

Closes #34

See merge request !32
parents 0fe1f4ae e1e186cc
Pipeline #25372 passed with stage
in 1 minute and 58 seconds
......@@ -8,6 +8,7 @@ which has its own page:
:maxdepth: 2
......@@ -19,6 +19,8 @@ Model fitting helper functions
In addition to basic model fitting routines, some helper
functions exist; for instance to help with fitting a number of
models to a data series following a "sliding window" pattern.
models to a data series following a "sliding window" pattern
or by applying PCA before the fit.
.. autofunction:: sails.modelfit.sliding_window_fit
.. autofunction:: sails.modelfit.pca_reduced_fit
Preprocessing Functions
.. autoclass:: sails.utils.PCA
......@@ -573,6 +573,74 @@ def sliding_window_fit(model_class, data, delay_vect,
def pca_reduced_fit(X, delay_vect, ndim, linear_model=VieiraMorfLinearModel):
Helper for computing an MVAR on dimensionality reduced data (using PCA).
Returns a
model fitted to reduced data, the reduced model projected back to original
data dimensions and the pca object used for reduction
X : ndarray
The data to compute the reduced model fit on
delay_vect : ndarray
A vector of lags specifying which lags to fit
ndim : int
Number of components to reduce data to.
linear_model : class
Subclass of AbstractLinearModel to use for fit.
Defaults to VieiraMorfLinearModel.
(Default value = True)
red_model : AbstractLinearModel
An instance of the linear model passed into the function containing MVAR
parameters for all windows for the reduced model.
proj_model : AbstractLinearModel
An instance of the linear model passed into the function containing MVAR
parameters for all windows for the projected model.
pc : sails.utils.PCA
PCA class with information about the PCA projection included
# PCA requires [samples x channels] whereas data is [channels x samples]
from .utils import PCA
pc = PCA(X.T, ndim)
Xred = pc.scores.T
Xred = Xred[:, :, None]
# Fit MVAR to dim-reduced data
red_model = linear_model.fit_model(Xred, delay_vect)
# Project back to data dimensions
Afull = np.zeros((X.shape[0], X.shape[0], len(delay_vect)))
# Don't project identity at start
Afull[:, :, 0] = -np.eye(X.shape[0])
for ii in range(1, len(delay_vect)):
Afull[:, :, ii] =, red_model.parameters[:, :, ii]).dot(pc.components)
resid_cov_full =, red_model.resid_cov[:, :]).dot(pc.components)[:, :, None]
# Store projected model
proj_model = linear_model()
proj_model.parameters = Afull
proj_model.resid_cov = resid_cov_full
proj_model.delay_vect = delay_vect
return red_model, proj_model, pc
def coloured_noise_fit(X, model_order):
"""A helper function to fit an autoregressive model whose parameters are
constrained to be coloured noise as definede by [Kasdin1995]_.
......@@ -6,6 +6,8 @@ import math
import numpy as np
from scipy import signal, stats
from .anam import AbstractAnam, register_class
__all__ = []
......@@ -334,3 +336,88 @@ def detect_artefacts(X, reject_dim='channels', reject_metric='std',
elif reject_dim == 'trials':
out[:, :, rm_ind]
return out
class PCA(AbstractAnam):
Class for handling PCA-based reduction before fitting a model
hdf5_outputs = ['npcs', 'data_mean', '_U', '_s', '_VT',
'explained_variance', 'components', 'loadings', 'scores']
def __init__(self, data=None, npcs=None):
# Need to cope with no-argument initialisation for anamnesis
if data is not None:
self.data_mean = data.mean(axis=0)
self._pca_svd(data, npcs)
def _pca_svd(self, data, npcs=10):
Compute a Principal Components Analysis on a given dataset using the SVD
X : ndarray
Input data of shape [nsamples x nfeatures]. The second dimension is
reduced by the PCA.
scores : ndarray [nsamples x npcs]
The dimensionality-reduced data. This contains the value of each
observation in the new PCA-space.
components : ndarray [npcs x nfeatures]
The eigenvectors describing how each feature (node or connection) loads
onto the latent PCA dimensions.
loadings : ndarray [npcs x nfeatures]
The components scaled by their contribution to the variance in the
original data
explained_variance_ratio_ : ndarray [npcs]
The proportion of variance explained by each PC
self.npcs = npcs
# Demean observations
self.data_mean = data.mean(axis=0)[None, :]
# compute
self._U, self._s, self._VT = np.linalg.svd(data - self.data_mean, full_matrices=False)
# Variance explained metrics
var = self._s ** 2 / (data.shape[0]-1)
self.explained_variance_ = var[:npcs]
self.explained_variance_ratio_ = var[:npcs] / var.sum()
# The weights for each original variable in each principal component
self.components = self._VT[:npcs, :] # Eigenvectors
self.loadings = self.components * self._s[:npcs, None] # Scaled by variance contributed to data
# The new co-ordinates of each observation in PCA-space
self.scores = self._s[:npcs] * self._U[:, :npcs]
# Check that component self.scores are properly decorrelated
C = np.corrcoef(self.scores.T)
assert(np.sum(np.abs(C - np.diag(np.diag(C)))) < 1e-10)
except AssertionError:
print('Observations are not properly de-correlated by PCA. Data demeaning might have gone wrong.')
def project_score(self, scores):
Compute a projection of a set of scores back to original data space
return self.data_mean +, self.components)
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment