Applying scikit-learn standards to custom wrappers

d81da020 · Antoine Guillaume · 0569d929 · d81da020 · d81da020 · d81da020
Commit d81da020 authored Nov 21, 2020 by Antoine Guillaume
6 changed files
--- a/CV_script.py
+++ b/CV_script.py
@@ -31,8 +31,8 @@ from sklearn.preprocessing import MinMaxScaler
 # In[3]:
 #Base path, all necessary folders are supposed to be contained in this one.
-base_path = r"!! REPLACE BY YOUR PATH !!"
+#base_path = r"!! REPLACE BY YOUR PATH !!"
+base_path = r"C:/Utilisateurs/A694772/Documents/ECMLPKDD_datacopy/"
 #Path to the life cycles CSV files.
 dataset_path = base_path+r"datasets/"
@@ -40,7 +40,8 @@ dataset_path = base_path+r"datasets/"
 result_path = base_path+r"results/"
 #If not None, CSV files containing data used by the TS-CHIEF java program will be outputed
-TSCHIEF_path = dataset_path+r"TSCHIEF/"
+#TSCHIEF_path = dataset_path+r"TSCHIEF/"
+TSCHIEF_path = None
 #If True, perform cross validation of all defined pipelines
 do_cross_validation = True
@@ -58,14 +59,14 @@ predictive_padding_hours = 48
 extended_infected_interval_hours = 24
 #Size of the PAA transform output
-size=1000
+size=500
 #Number of cross validation splits
-n_splits=10
+n_splits=2
 # Number of process to launch in parallel for cross validation of each pipeline.
 # Set to None if you don't have the setup to allow such speedups. 
-n_cv_jobs=-1
+n_cv_jobs=None
 if dataset_path is not None and not exists(dataset_path):
    mkdir(dataset_path)

--- a/README.md
+++ b/README.md
@@ -34,8 +34,6 @@ Configuration parameters are located at the beginning of CV_script, you MUST cha
 To change or check the algorithms parameters, they all are redefined in custom wrapper classes to avoid errors, if a parameter is not specified in the constructor, it is left as default.
 The representations methods are defined inside utils.representations and the classifications methods inside utils.classifications.
-ResNet is left commented in the code, so you can run the other algorithms without a Tensorflow installation or a GPU without any impact.
 ## Usage
 Extract the files of the dataset archive located in ~/datasets in the dataset folder
@@ -58,6 +56,25 @@ The runtime of this script is extremely long, one iteration take about 4 hours,
 ```bash
 python TSCHIEF_results_to_csv.py
 ```
+## Note on using sktime-dl for InceptionTime and ResNet
+Both InceptionTime and ResNet are left commented in the code, so you can run the other algorithms without a Tensorflow installation or a GPU without any impact.
+Depending on your installation, you might run into errors while feeding tensorflow models in a cross validation pipeline from scikit-learn. Some of those issues can be fixed by making the wrapper for those models defined in utils.classifications inheriting the KerasClassifier wrapper from tensorflow.
+To make those two algorithms part of the experiments, you have to uncomment both their declaration in utils.classifications and the associated pipeline in CV_script.
+About InceptionTime : sktime-dl is the package dedicated for deep learning built by the sktime authors, still being in active development at time of writing, we add to make some modifications to the source code to be able to run InceptionTime.
+From the latest version available on github we applied the following modification :
+* Fix import error from sktime utils : In sktime_dl/utils/_data.py, replace :
+```
+from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested (_data.py line 6)
+```
+by
+```
+from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested (_data.py line 6)
+```
+* We also modified InceptionTime to use binary_crossentropy (change loss name and use sigmod layer with 1 neuron as an output) and weighted accuracy for early stopping. This is not mandatory but is more suited to our problem.
 ## Contributing

--- a/utils/__pycache__/classifications.cpython-37.pyc
+++ b/utils/__pycache__/classifications.cpython-37.pyc
--- a/utils/__pycache__/representations.cpython-37.pyc
+++ b/utils/__pycache__/representations.cpython-37.pyc
--- a/utils/classifications.py
+++ b/utils/classifications.py
@@ -10,18 +10,16 @@ from sklearn.linear_model import RidgeClassifier
 from sktime.classification.interval_based import TimeSeriesForest
 from sktime.utils.data_container import concat_nested_arrays as cna
 from sktime.classification.frequency_based import RandomIntervalSpectralForest
+from sklearn.base import BaseEstimator, ClassifierMixin
+# # Define classes for classification methods
-# # Define classes for representation methods
+# Here we define custom classes when necessary for the classification methods we will use inside pipelines during cross validation. 
-# Here we define custom classes when necessary for the representation methods we will use inside pipelines during cross validation. 
 # 
 # See corresponding modules documentation for documentation.
 # 
 # Pyts : https://pyts.readthedocs.io/
-# 
+#
-# MatrixProfile : https://matrixprofile.docs.matrixprofile.org/
-# 
 # sktime : https://sktime.org/index.html
 # 
 # sklearn : https://scikit-learn.org/stable/index.html
@@ -31,10 +29,10 @@ from sktime.classification.frequency_based import RandomIntervalSpectralForest
 # In[10]:
 """
-#This section is left commented so you have no trouble running the script without Tensorflow/GPU
+# This section is left commented so you have no trouble running the script without Tensorflow/GPU
-#If you have error during cross validation, you can try to make the class ResNetV2
+# While using ResNet, if you have error during cross validation, you can try to make the class ResNetV2
 # inherit the tensorflow.keras KerasClassifier wrapper, it can fix some issues.
-# Don't forget to uncomment pipelines in CV_scripts aswell.
+# Don't forget to uncomment pipelines using ResNet in CV_scripts aswell.
 from tensorflow.keras.applications import ResNet50V2
 from tensorflow.keras.optimizers import Adam
@@ -42,7 +40,8 @@ from sklearn.model_selection import train_test_split
 from tensorflow.keras.callbacks import EarlyStopping
 from sklearn.utils.class_weight import compute_class_weight
-class ResNetV2:
+class ResNetV2(BaseEstimator, ClassifierMixin):
    def __init__(self, loss='binary_crossentropy', pooling='avg', optimizer=Adam(lr=1e-4)):
        self.loss = loss
        self.pooling = pooling
@@ -65,9 +64,9 @@ class ResNetV2:
        self.init_model((X.shape[1], X.shape[2], X.shape[3]))
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size)
        el = EarlyStopping(monitor='val_accuracy', patience=el_patience, restore_best_weights=True, mode='max')
-        cw = compute_class_weight('balanced',np.unique(y_train), y_train)
+        cw = compute_class_weight('balanced', np.unique(y_train), y_train)
-        history = self.model.fit(
+        self.model.fit(
            X_train, y_train,
            validation_data=(X_val,y_val),
            epochs=epochs,
@@ -77,35 +76,73 @@ class ResNetV2:
            shuffle=True,
            class_weight={0:cw[0],1:cw[1]}
        )
-        if return_hist:
+        return self
-            return history
    def predict(self, X):
        return np.array([x>0.5 for x in self.model.predict(X)]).astype(int)
-"""
+    def predict_proba(self,X):
+        return self.model.predict(X)
+from sktime_dl.deeplearning.inceptiontime._classifier import InceptionTimeClassifier
-class RISE:
-    def __init__(self, min_length=5, n_estimators=300):
-        self.estimator = RandomIntervalSpectralForest(n_estimators=n_estimators, min_interval=min_length)
+class InceptionTime(BaseEstimator, ClassifierMixin):
+    def __init__(self, depth=18, nb_filters=32, bottleneck_size=32):
+        self.model = None
+        self.depth = depth
+        self.nb_filters = nb_filters
+        self.bottleneck_size = bottleneck_size
+    def fit(self, X, y, epochs=1500, batch_size=32,
+            el_patience=100, verbose=False, val_size=0.1):
+        self.model = InceptionTimeClassifier(verbose=verbose, depth=self.depth,
+                                             nb_filters=self.nb_filters, bottleneck_size=self.bottleneck_size,
+                                             callbacks=[EarlyStopping(monitor='val_accuracy', patience=el_patience,
+                                                                      restore_best_weights=True, mode='max')])
+        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size)
+        self.model.fit(X_train, y_train, validation_X=X_val,validation_y=y_val)
+        return self
+    def predict(self, X):
+        return np.array([x>0.5 for x in self.model.predict(X)]).astype(int)
+    def predict_proba(self,X):
+        return self.model.predict(X)
+"""
+class SktimeEstimator:
    def _sktime_format(self,X,y):
        # X : (n_instance, n_timestamp, n_features)
-        X, y = cna(X.reshape(X.shape[2],X.shape[0],X.shape[1])), np.asarray(y)
+        X, y = self._sktime_format_X(X), np.asarray(y)
        return X, y
-    def set_params(self, **parameters):
-        self.estimator.set_params(**parameters)
-        return self
    def _sktime_format_X(self,X):
        # X : (n_instance, n_timestamp, n_features)
        return cna(X.reshape(X.shape[2],X.shape[0],X.shape[1]))
+class PytsEstimator:
+    def _format(self,X,y):
+        return self._format_X(X), np.asarray(y)
+    def _format_X(self,X):
+        return X.reshape(X.shape[0],X.shape[1])
+class RISE(BaseEstimator, ClassifierMixin, SktimeEstimator):
+    def __init__(self, min_length=5, n_estimators=300):
+        self.min_length = min_length
+        self.n_estimators = n_estimators
+        self.estimator = None
    def fit(self,X,y):
        X, y = self._sktime_format(X,y)
+        self.estimator = RandomIntervalSpectralForest(n_estimators=self.n_estimators, min_interval=self.min_length)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        X = self._sktime_format_X(X)
@@ -115,19 +152,26 @@ class RISE:
        X = self._sktime_format(X)
        return self.estimator.predict_proba(X)
-class Random_Forest:
+class Random_Forest(BaseEstimator, ClassifierMixin):
    def __init__(self, n_estimators=300, max_depth=None, max_features=0.75, max_samples=0.75,
            ccp_alpha=0.0225, class_weight="balanced_subsample"):
-        self.estimator = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth,
+        self.n_estimators=n_estimators
-                                                max_features=max_features, max_samples=max_samples,
+        self.max_depth=max_depth
-                                                ccp_alpha=ccp_alpha,class_weight=class_weight)
+        self.max_features=max_features
+        self.max_samples=max_samples
+        self.ccp_alpha=ccp_alpha
+        self.class_weight=class_weight
+        self.estimator = None
-    def set_params(self, **params):
+    def fit(self, X, y):
-        return self.estimator.set_params(**params)
-    def fit(self,X,y):
        X = np.asarray([x.astype(np.float32) for x in X])
+        self.estimator = RandomForestClassifier(n_estimators=self.n_estimators, max_depth=self.max_depth,
+                                                max_features=self.max_features, max_samples=self.max_samples,
+                                                ccp_alpha=self.ccp_alpha,class_weight=self.class_weight)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        X = np.asarray([x.astype(np.float32) for x in X])
@@ -137,15 +181,17 @@ class Random_Forest:
        X = np.asarray([x.astype(np.float32) for x in X])
        return self.estimator.predict_proba(X)
-class KNN_classif:
+class KNN_classif(BaseEstimator, ClassifierMixin):
    def __init__(self, n_neighbors=9, weights='distance',p=2):
-        self.estimator = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, p=p)
+        self.n_neighbors = n_neighbors
+        self.weights = weights
-    def set_params(self, **params):
+        self.p = p
-        return self.estimator.set_params(**params)
+        self.estimator = None
-    def fit(self,X,y):       
+    def fit(self,X,y):
+        self.estimator = KNeighborsClassifier(n_neighbors=self.n_neighbors, weights=self.weights, p=self.p)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        return self.estimator.predict(X)
@@ -153,27 +199,19 @@ class KNN_classif:
    def predict_proba(self,X):
        return self.estimator.predict_proba(X)
-class TimeSeries_Forest:
+class TimeSeries_Forest(BaseEstimator, ClassifierMixin, SktimeEstimator):
    def __init__(self, n_estimators=300,  min_interval=3):
+        self.n_estimators = n_estimators
-        self.estimator = TimeSeriesForest(n_estimators=n_estimators,
+        self.min_interval = min_interval
-                                          min_interval=3) 
+        self.estimator = None
-    def set_params(self, **params):
-        return self.estimator.set_params(**params)
-    def _sktime_format(self,X,y):
-        # X : (n_instance, n_timestamp, n_features)
-        X, y = cna(X.reshape(X.shape[2],X.shape[0],X.shape[1])), np.asarray(y)
-        return X, y
-    def _sktime_format_X(self,X):
-        # X : (n_instance, n_timestamp, n_features)
-        return cna(X.reshape(X.shape[2],X.shape[0],X.shape[1]))
    def fit(self,X,y):
        X, y = self._sktime_format(X,y)
+        self.estimator = TimeSeriesForest(n_estimators=self.n_estimators,
+                                          min_interval=self.min_interval) 
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        X = self._sktime_format_X(X)
@@ -184,16 +222,23 @@ class TimeSeries_Forest:
        return self.estimator.predict_proba(X)
-class SVM_classif:
+class SVM_classif(BaseEstimator, ClassifierMixin):
-    def __init__(self, C=10, kernel='rbf', degree=2, gamma='scale'):
+    def __init__(self, C=10, kernel='rbf', degree=2, gamma='scale',
-        self.estimator = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma,
+                 cache_size=500, class_weight='balanced'):
-                             cache_size=500, class_weight='balanced')
+            self.C = C
+            self.kernel = kernel
-    def set_params(self, **params):
+            self.degree = degree
-        return self.estimator.set_params(**params)
+            self.gamma = gamma
+            self.cache_size = cache_size
+            self.class_weight = class_weight
+            self.estimator = None
-    def fit(self,X,y):       
+    def fit(self,X,y):
+        self.estimator = SVC(C=self.C, kernel=self.kernel, degree=self.degree,
+                             gamma=self.gamma, cache_size=self.cache_size,
+                             class_weight=self.class_weight)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        return self.estimator.predict(X)
@@ -201,17 +246,26 @@ class SVM_classif:
    def predict_proba(self,X):
        return self.estimator.predict_proba(X)
-class Ridge_classif:
+class Ridge_classif(BaseEstimator, ClassifierMixin):
    def __init__(self, alpha=10.0, normalize=False, copy_X=True, max_iter=None, tol=0.001,
                 class_weight='balanced'):
-        self.estimator = RidgeClassifier(alpha=alpha, normalize=normalize, copy_X=copy_X,
+        self.alpha = alpha
-                                         max_iter=max_iter, tol=tol, class_weight=class_weight)
+        self.normalize = normalize
+        self.copy_X = copy_X
+        self.max_iter = max_iter
+        self.tol = tol
+        self.class_weight = class_weight
+        self.estimator = None
    def set_params(self, **params):
        return self.estimator.set_params(**params)
-    def fit(self,X,y):       
+    def fit(self,X,y):
+        self.estimator = RidgeClassifier(alpha=self.alpha, normalize=self.normalize,
+                                         copy_X=self.copy_X, max_iter=self.max_iter,
+                                         tol=self.tol, class_weight=self.class_weight)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        return self.estimator.predict(X)
@@ -219,22 +273,19 @@ class Ridge_classif:
    def predict_proba(self,X):
        return self.estimator.predict_proba(X)     
-class KNN_TS_classif:
+class KNN_TS_classif(BaseEstimator, ClassifierMixin, PytsEstimator):
    def __init__(self, n_neighbors=9, weights='distance', p=2):
-        self.estimator = KNeighborsClassifierTS(n_neighbors=n_neighbors, weights=weights, p=p)
+        self.n_neighbors = n_neighbors
+        self.weights = weights
-    def _format(self,X,y):
+        self.p = p
-        return X.reshape(X.shape[0],X.shape[1]), np.asarray(y)
+        self.estimator = None
-    def set_params(self, **params):
-        return self.estimator.set_params(**params)
-    def _format_X(self,X):
-        return X.reshape(X.shape[0],X.shape[1])
    def fit(self,X,y):       
        X, y = self._format(X,y)
+        self.estimator = KNeighborsClassifierTS(n_neighbors=self.n_neighbors,
+                                                weights=self.weights, p=self.p)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        X = self._format_X(X)
@@ -245,29 +296,32 @@ class KNN_TS_classif:
        return self.estimator.predict_proba(X)
-class BOSSVS_classif:
+class BOSSVS_classif(BaseEstimator, ClassifierMixin, PytsEstimator):
    def __init__(self, word_size=9, n_bins=7, window_size=0.2, window_step=1,
                 anova=True, drop_sum=False, norm_mean=False, norm_std=False,
                 strategy='uniform', alphabet=None):
-        self.estimator = BOSSVS(word_size=word_size, n_bins=n_bins,
+        self.word_size = word_size
-                                window_size=window_size, window_step=window_step,
+        self.n_bins = n_bins
-                                anova=anova, drop_sum=drop_sum,
+        self.window_size = window_size
-                                norm_mean=norm_mean, norm_std=norm_std,
+        self.window_step = window_step
-                                strategy=strategy, alphabet=alphabet)
+        self.anova = anova
-    def set_params(self, **params):
+        self.drop_sum = drop_sum
-        return self.estimator.set_params(**params)
+        self.norm_mean = norm_mean
+        self.norm_std = norm_std
+        self.strategy = strategy
+        self.alphabet = alphabet
+        self.estimator = None
-    def _format(self,X,y):
+    def fit(self,X,y):        
-        # X : (n_instance, n_timestamp, n_features)
-        return X.reshape(X.shape[0],X.shape[1]), np.asarray(y)
-    def _format_X(self,X):
-        # X : (n_instance, n_timestamp, n_features)
-        return X.reshape(X.shape[0],X.shape[1])
-    def fit(self,X,y):       
        X, y = self._format(X,y)
+        self.estimator = BOSSVS(word_size=self.word_size, n_bins=self.n_bins,
+                                window_size=self.window_size, window_step=self.window_step,
+                                anova=self.anova, drop_sum=self.drop_sum,
+                                norm_mean=self.norm_mean, norm_std=self.norm_std,
+                                strategy=self.strategy, alphabet=self.alphabet)
        self.estimator.fit(X,y)
+        return self
    def predict(self,X):
        X = self._format_X(X)

--- a/utils/representations.py
+++ b/utils/representations.py
@@ -9,6 +9,7 @@ from pyts.approximation import PiecewiseAggregateApproximation, SymbolicAggregat
 from pyts.transformation import ROCKET
 from matplotlib import pyplot as plt
+from sklearn.base import BaseEstimator, TransformerMixin
 # # Define classes for representation methods
 # Here we define custom classes when necessary for the representation methods we will use inside pipelines during cross validation. 
@@ -18,25 +19,21 @@ from matplotlib import pyplot as plt
 # Pyts : https://pyts.readthedocs.io/
 # 
 # MatrixProfile : https://matrixprofile.docs.matrixprofile.org/
-# 
-# sktime : https://sktime.org/index.html
-# 
-# sklearn : https://scikit-learn.org/stable/index.html
 # In[2]:
 #Gramian natively use PAA, reccurence don't, 
 #that's why you'll see calls to PAA inside the Recurrence class but not in the Gramian
-class Gramian_transform:
+class Gramian_transform(BaseEstimator, TransformerMixin):
    def __init__(self, img_size=128, flatten=False, method='s'):
        self.img_size = img_size
-        self.flatten=flatten
+        self.flatten = flatten
+        self.method = method
        self.cmap = plt.get_cmap('jet')
-        self.transformer = GramianAngularField(image_size=img_size,
+        self.transformer = None
-                                               method=method,
-                                               flatten=flatten)
+    def transform(self, X, y=None):
-    def transform(self,X):
        if type(X[0]) == pd.core.series.Series:
            X = np.asarray([x.values for x in X])
@@ -48,24 +45,22 @@ class Gramian_transform:
            X = self.cmap(X)[:,:,:,:,0:3].reshape(X.shape[0],self.img_size, self.img_size,3)
        return X
-    def set_params(self, **params):
+    def fit(self, X, y=None):
-        return self.transformer.set_params(**params)
+        self.transformer = GramianAngularField(image_size=self.img_size,
+                                               method=self.method,
+                                               flatten=self.flatten)
+        return self
-    def fit_transform(self,X,y):
+class Recurrence_transform(BaseEstimator, TransformerMixin):
-        return self.transform(X)
-class Recurrence_transform:
    def __init__(self, output_size=128, dimension=1, time_delay=6, flatten=False):
        self.output_size = output_size
        self.flatten=flatten
+        self.dimension = dimension
+        self.time_delay = time_delay
        self.cmap = plt.get_cmap('jet')
-        self.approximator = PiecewiseAggregateApproximation(output_size=output_size,
+        self.transformer = None
-                                                                  window_size=None, 
-                                                                  overlapping=False)
+    def transform(self, X, y=None):
-        self.transformer = RecurrencePlot(dimension=dimension,
-                                          time_delay=time_delay,
-                                          flatten=flatten)
-    def transform(self,X):
        if type(X[0]) == pd.core.series.Series:
            X = np.asarray([x.values for x in X])
@@ -78,27 +73,23 @@ class Recurrence_transform:
            X = self.cmap(X)[:,:,:,:,0:3].reshape(X.shape[0],self.output_size, self.output_size,3)
        return X
-    def set_params(self, **parameters):
+    def fit(self, X, y=None):
-        for parameter, value in parameters.items():
+        self.approximator = PiecewiseAggregateApproximation(output_size=self.output_size,
-            if parameter == 'output_size':
+                                                                  window_size=None, 
-                self.approximator.set_params(**{parameter: value})
+                                                                  overlapping=False)
-                setattr(self, parameter, value)
+        self.transformer = RecurrencePlot(dimension=self.dimension,
-            elif parameter in ['dimension','time_delay']:
+                                          time_delay=self.time_delay,
-                self.transformer.set_params(**{parameter: value})
+                                          flatten=self.flatten)
-            else:
-                setattr(self, parameter, value)
        return self
-    def fit_transform(self,X,y):
+class PiecewiseApproximation_transform(BaseEstimator, TransformerMixin):
-        return self.transform(X)
-class PiecewiseApproximation_transform:
    def __init__(self, output_size=1000, overlapping=False, window_size=None):
        self.output_size = output_size
-        self.transformer = PiecewiseAggregateApproximation(output_size=output_size, 
+        self.overlapping = overlapping
-                                                           window_size=window_size,
+        self.window_size = window_size
-                                                           overlapping=overlapping)
+        self.transformer = None
-    def transform(self,X):
+    def transform(self, X, y=None):
        if type(X[0]) == pd.core.series.Series:
            X = np.asarray([x.values for x in X])
@@ -106,77 +97,79 @@ class PiecewiseApproximation_transform:
        X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
        return X
-    def set_params(self, **params):
+    def fit(self, X, y=None):
-        return self.transformer.set_params(**params)
+        self.transformer = PiecewiseAggregateApproximation(output_size=self.output_size, 
+                                                           window_size=self.window_size,
-    def fit_transform(self,X,y):
+                                                           overlapping=self.overlapping)
-        return self.transform(X)
+        return self
-class SymbolicAggregate_transform:
+class SymbolicAggregate_transform(BaseEstimator, TransformerMixin):
    def __init__(self, n_bins=7, strategy='uniform', alphabet='ordinal'):
-        self.transformer = SymbolicAggregateApproximation(n_bins=n_bins, strategy=strategy,
+        self.n_bins = n_bins
-                                                          alphabet=alphabet)
+        self.strategy = strategy
+        self.alphabet = alphabet
+        self.transformer = None
-    def set_params(self, **params):
+    def transform(self, X, y=None):
-        return self.transformer.set_params(**params)
-    def transform(self, X):
        X = np.asarray([self.transformer.transform(x.reshape(1,-1)).astype(float) if np.max(x) - np.min(x) != 0 else np.zeros((1,x.shape[0])) for x in X])
        X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
        return X
-    def fit_transform(self,X,y):
+    def fit(self, X, y=None):
-        return self.transform(X)
+        self.transformer = SymbolicAggregateApproximation(n_bins=self.n_bins,
+                                                          strategy=self.strategy,
-class SymbolicFourrier_transform:
+                                                          alphabet=self.alphabet)
+        return self
+class SymbolicFourrier_transform(BaseEstimator, TransformerMixin):
    def __init__(self, n_coefs=20, n_bins=7, strategy='uniform', drop_sum=False,
                 anova=True, norm_mean=True, norm_std=False, alphabet='ordinal'):
-        self.transformer = SymbolicFourierApproximation(n_coefs=n_coefs, n_bins=n_bins,
+        self.n_coefs = n_coefs
-                                                        strategy=strategy, alphabet=alphabet,
+        self.n_bins = n_bins
-                                                        drop_sum=drop_sum, anova=anova,
+        self.strategy = strategy
-                                                        norm_mean=norm_mean, norm_std=norm_std)
+        self.alphabet = alphabet
-    def transform(self,X):
+        self.drop_sum = drop_sum
+        self.anova = anova
+        self.norm_mean = norm_mean
+        self.norm_std = norm_std
+        self.transformer = None
+    def transform(self, X, y=None):
        X = np.asarray([self.transformer.transform(x.reshape(1,-1)).astype(float) if np.max(x) - np.min(x) != 0 else np.zeros((1,x.shape[0])) for x in X])         
        X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
        return X
-    def set_params(self, **params):
+    def fit(self, X, y=None):
-        return self.transformer.set_params(**params)
+        self.transformer = SymbolicFourierApproximation(n_coefs=self.n_coefs, n_bins=self.n_bins,
+                                                        strategy=self.strategy, alphabet=self.alphabet,
-    def fit_transform(self,X,y):
+                                                        drop_sum=self.drop_sum, anova=self.anova,
-        X = X.reshape(X.shape[0],X.shape[1])
+                                                        norm_mean=self.norm_mean, norm_std=self.norm_std)
-        self.transformer.fit(X,y)
+        return self
-        return self.transform(X)
-class MatrixProfile_transform:
+class MatrixProfile_transform(BaseEstimator, TransformerMixin):
    def __init__(self, window_size=0.075):
        self._window_size=window_size
-    def transform(self, X):
+    def transform(self, X, y=None):
        if type(X[0]) == pd.core.series.Series:
            X = np.asarray([x.values for x in X])
        X = np.asarray([mp.compute(x.reshape(-1),windows=x.shape[0]*self._window_size)['mp'].reshape(1,-1) for x in X])        
        X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
        return X
-    def fit_transform(self,X,y):
+    def fit(self, X, y=None):
-        return self.transform(X)
-    def set_params(self, **parameters):
-        for parameter, value in parameters.items():
-            setattr(self, parameter, value)
        return self
-class ROCKET_transform:
+class ROCKET_transform(BaseEstimator, TransformerMixin):
    def __init__(self, n_kernels=15000, kernel_sizes=(5,7,9), flatten=False):
        self.flatten = flatten
-        self.transformer = ROCKET(n_kernels=n_kernels, kernel_sizes=kernel_sizes)
+        self.n_kernels = n_kernels
+        self.kernel_sizes = kernel_sizes
-    def set_params(self, **params):
+        self.transformer = None
-        return self.transformer.set_params(**params)
+    def transform(self, X, y=None):
-    def transform(self,X):
        X = X.reshape(X.shape[0],X.shape[1])
        X = self.transformer.transform(X)
        if self.flatten:
@@ -185,8 +178,8 @@ class ROCKET_transform:
            X = X.reshape(X.shape[0], X.shape[1], 1)
        return X
-    def fit_transform(self,X,y):
+    def fit(self, X, y=None):
+        self.transformer = ROCKET(n_kernels=self.n_kernels, kernel_sizes=self.kernel_sizes)
        X = X.reshape(X.shape[0],X.shape[1])
        self.transformer.fit(X)
-        return self.transform(X)
+        return self