Fix of some sklearn interfaces

e3496cd6 · Antoine Guillaume · d81da020 · e3496cd6 · e3496cd6 · e3496cd6
Commit e3496cd6 authored Nov 21, 2020 by Antoine Guillaume
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 14 deletions

CV_script.py CV_script.py +5 -6

README.md README.md +3 -3

classifications.py utils/classifications.py +1 -0

representations.py utils/representations.py +12 -5

No files found.
--- a/CV_script.py
+++ b/CV_script.py
@@ -31,8 +31,8 @@ from sklearn.preprocessing import MinMaxScaler
 # In[3]:

 #Base path, all necessary folders are supposed to be contained in this one.
-#base_path = r"!! REPLACE BY YOUR PATH !!"
-base_path = r"C:/Utilisateurs/A694772/Documents/ECMLPKDD_datacopy/"
+base_path = r"!! REPLACE BY YOUR PATH !!"
+
 #Path to the life cycles CSV files.
 dataset_path = base_path+r"datasets/"

@@ -40,8 +40,7 @@ dataset_path = base_path+r"datasets/"
 result_path = base_path+r"results/"

 #If not None, CSV files containing data used by the TS-CHIEF java program will be outputed
-#TSCHIEF_path = dataset_path+r"TSCHIEF/"
-TSCHIEF_path = None
+TSCHIEF_path = dataset_path+r"TSCHIEF/"

 #If True, perform cross validation of all defined pipelines
 do_cross_validation = True
@@ -59,10 +58,10 @@ predictive_padding_hours = 48
 extended_infected_interval_hours = 24

 #Size of the PAA transform output
-size=500
+size=1000

 #Number of cross validation splits
-n_splits=2
+n_splits=10

 # Number of process to launch in parallel for cross validation of each pipeline.
 # Set to None if you don't have the setup to allow such speedups. 

--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ The experiment were conducted with python 3.8, the following packages are requir
 * sktime
 * pandas

-If you wish to run ResNet for images classification, you will also need Tensorflow 2.x.
+If you wish to run ResNet for images classification, you will also need Tensorflow 2.x, and sktime-dl for InceptionTime.

 ## How to get the ATM dataset
 The ATM dataset being a property of equensWorldline, you must first send an email to "intellectual-property-team-worldline@worldline.com" and "antoine.guillaume@equensworldline.com" to ask for authorization. 
@@ -67,11 +67,11 @@ From the latest version available on github we applied the following modificatio

 * Fix import error from sktime utils : In sktime_dl/utils/_data.py, replace :
 ```
-from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested (_data.py line 6)
+from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested
 ```
 by
 ```
-from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested (_data.py line 6)
+from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested
 ```

 * We also modified InceptionTime to use binary_crossentropy (change loss name and use sigmod layer with 1 neuron as an output) and weighted accuracy for early stopping. This is not mandatory but is more suited to our problem.

--- a/utils/classifications.py
+++ b/utils/classifications.py
 # -*- coding: utf-8 -*-
+
 # In[1]:
 import numpy as np
 from pyts.classification import BOSSVS

--- a/utils/representations.py
+++ b/utils/representations.py
 # -*- coding: utf-8 -*-
+
 # In[1]:
 import numpy as np
 import pandas as pd
@@ -49,6 +50,7 @@ class Gramian_transform(BaseEstimator, TransformerMixin):
        self.transformer = GramianAngularField(image_size=self.img_size,
                                               method=self.method,
                                               flatten=self.flatten)
+        self.transformer.fit(X)
        return self
    
 class Recurrence_transform(BaseEstimator, TransformerMixin):
@@ -77,9 +79,11 @@ class Recurrence_transform(BaseEstimator, TransformerMixin):
        self.approximator = PiecewiseAggregateApproximation(output_size=self.output_size,
                                                                  window_size=None, 
                                                                  overlapping=False)
+        self.approximator.fit(X)
        self.transformer = RecurrencePlot(dimension=self.dimension,
                                          time_delay=self.time_delay,
                                          flatten=self.flatten)
+        self.transformer.fit(X)
        return self
    
 class PiecewiseApproximation_transform(BaseEstimator, TransformerMixin):
@@ -101,6 +105,7 @@ class PiecewiseApproximation_transform(BaseEstimator, TransformerMixin):
        self.transformer = PiecewiseAggregateApproximation(output_size=self.output_size, 
                                                           window_size=self.window_size,
                                                           overlapping=self.overlapping)
+        self.transformer.fit(X)
        return self
        
 class SymbolicAggregate_transform(BaseEstimator, TransformerMixin):
@@ -119,6 +124,7 @@ class SymbolicAggregate_transform(BaseEstimator, TransformerMixin):
        self.transformer = SymbolicAggregateApproximation(n_bins=self.n_bins,
                                                          strategy=self.strategy,
                                                          alphabet=self.alphabet)
+        self.transformer.fit(X)
        return self
        
 class SymbolicFourrier_transform(BaseEstimator, TransformerMixin):
@@ -139,23 +145,24 @@ class SymbolicFourrier_transform(BaseEstimator, TransformerMixin):
        X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
        return X
    
-    def fit(self, X, y=None):
+    def fit(self, X, y):
        self.transformer = SymbolicFourierApproximation(n_coefs=self.n_coefs, n_bins=self.n_bins,
                                                        strategy=self.strategy, alphabet=self.alphabet,
                                                        drop_sum=self.drop_sum, anova=self.anova,
                                                        norm_mean=self.norm_mean, norm_std=self.norm_std)
+        X = X.reshape(X.shape[0],X.shape[1])
+        self.transformer.fit(X,y)
        return self
    
    
-class MatrixProfile_transform(BaseEstimator, TransformerMixin):
+class MatrixProfile_transform():
    def __init__(self, window_size=0.075):
-        self._window_size=window_size
-        
+        self.window_size = window_size
        
    def transform(self, X, y=None):
        if type(X[0]) == pd.core.series.Series:
            X = np.asarray([x.values for x in X])
-        X = np.asarray([mp.compute(x.reshape(-1),windows=x.shape[0]*self._window_size)['mp'].reshape(1,-1) for x in X])        
+        X = np.asarray([mp.compute(x.reshape(-1),windows=x.shape[0]*self.window_size)['mp'].reshape(1,-1) for x in X])        
        X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
        return X