Source code for hcrystalball.preprocessing._endog_transformer
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
[docs]class TargetTransformer(TransformerMixin, BaseEstimator):
"""Enable transformation of the target.
Wrapper for applying an estimator to a transformed version of the target y
and automatically transforming back predictions
"""
def __init__(self, estimator, y_transformer, omit_inverse_transformation=False):
self.estimator = estimator
self.y_transformer = y_transformer
self.omit_inverse_transformation = omit_inverse_transformation
self.steps = self.estimator.steps if hasattr(self.estimator, "steps") else None
def _reshape_2d(self, y):
"""Ensure correct array size
Parameters
----------
y : numpy.ndarray
Target values.
Returns
-------
numpy.ndarray
Target values in 1d dimension
"""
if y.ndim == 1:
return y.values.reshape(-1, 1) if isinstance(y, pd.Series) else y.reshape(-1, 1)
return y
def _restore_shape(self, y):
"""Restores correct array shape
Parameters
----------
y : numpy.ndarray
Target values.
Returns
-------
numpy.ndarray
Target values in 1d dimension if one negligible
"""
if y.ndim == 2 and y.shape[1] == 1:
return y.squeeze(axis=1)
return y
[docs] def fit(self, X, y=None):
"""Fit after reshaping and rescaling the target
Reshape target to 2d, call fit_transform on 2d, return to 1d form
and fit estimator on transformed target
Parameters
----------
X : Any
Ignored.
y : numpy.ndarray
Target values.
Returns
-------
`TargetTransformer`
Fitted target transformer
"""
self._training_dim = y.ndim
# things are made more complicated by the fact that sklearn
# transformers expect 2D arrays. Thus need to reshape
y_2d = self._reshape_2d(y)
y_t = self.y_transformer.fit_transform(y_2d, y)
# restore 1D if necessary
y_t = self._restore_shape(y_t)
# fit estimator on transformed target
self.estimator.fit(X, y_t)
return self
[docs] def transform(self, X, y=None):
"""Transforms the features
Parameters
----------
X : numpy.ndarray
Input features.
y : Any
Ignored.
Returns
-------
numpy.ndarray
Result of estimator transform
"""
return self.estimator.transform(X)
def _predict(self, X, y=None):
pred_t = self.estimator.predict(X)
if self.omit_inverse_transformation:
return pred_t
else:
# again, transformer expects 2D input for doing inverse transform
pred = self.y_transformer.inverse_transform(self._reshape_2d(pred_t))
# if output is expected to be 1D, squeeze if necessary
if self._training_dim == 1:
return self._restore_shape(pred)
return pred
[docs] def predict(self, X, y=None):
"""Ensure correct estimator.predict with scaled target values
Parameters
----------
X : numpy.ndarray
Input features.
y : numpy.ndarray
Target values.
Returns
-------
pandas.DataFrame
Results of estimators prediction
"""
preds = self._predict(X, y)
name = (
self.estimator.steps[-1][1].name if isinstance(self.estimator, Pipeline) else self.estimator.name
)
return pd.DataFrame(preds, index=X.index, columns=[name])
[docs] def score(self, X, y=None):
"""Ensures correct estimator.score with scaled target values
Parameters
----------
X : numpy.ndarray
Input features.
y : numpy.ndarray
Target values.
Returns
-------
Any
Results of estimators score function
"""
y_2d = self._reshape_2d(y)
y_t = self.y_transformer.transform(y_2d)
if self._training_dim == 1:
y_t = self._restore_shape(y_t)
return self.estimator.score(X, y_t)
[docs] def named_steps(self):
"""Provide access to named steps for `~sklearn.pipeline.Pipeline`
Returns
-------
dict
Dictionary of steps
"""
return dict(self.steps)