Commit e3496cd6 authored by Antoine Guillaume's avatar Antoine Guillaume

Fix of some sklearn interfaces

parent d81da020
......@@ -31,8 +31,8 @@ from sklearn.preprocessing import MinMaxScaler
# In[3]:
#Base path, all necessary folders are supposed to be contained in this one.
#base_path = r"!! REPLACE BY YOUR PATH !!"
base_path = r"C:/Utilisateurs/A694772/Documents/ECMLPKDD_datacopy/"
base_path = r"!! REPLACE BY YOUR PATH !!"
#Path to the life cycles CSV files.
dataset_path = base_path+r"datasets/"
......@@ -40,8 +40,7 @@ dataset_path = base_path+r"datasets/"
result_path = base_path+r"results/"
#If not None, CSV files containing data used by the TS-CHIEF java program will be outputed
#TSCHIEF_path = dataset_path+r"TSCHIEF/"
TSCHIEF_path = None
TSCHIEF_path = dataset_path+r"TSCHIEF/"
#If True, perform cross validation of all defined pipelines
do_cross_validation = True
......@@ -59,10 +58,10 @@ predictive_padding_hours = 48
extended_infected_interval_hours = 24
#Size of the PAA transform output
size=500
size=1000
#Number of cross validation splits
n_splits=2
n_splits=10
# Number of process to launch in parallel for cross validation of each pipeline.
# Set to None if you don't have the setup to allow such speedups.
......
......@@ -22,7 +22,7 @@ The experiment were conducted with python 3.8, the following packages are requir
* sktime
* pandas
If you wish to run ResNet for images classification, you will also need Tensorflow 2.x.
If you wish to run ResNet for images classification, you will also need Tensorflow 2.x, and sktime-dl for InceptionTime.
## How to get the ATM dataset
The ATM dataset being a property of equensWorldline, you must first send an email to "intellectual-property-team-worldline@worldline.com" and "antoine.guillaume@equensworldline.com" to ask for authorization.
......@@ -67,11 +67,11 @@ From the latest version available on github we applied the following modificatio
* Fix import error from sktime utils : In sktime_dl/utils/_data.py, replace :
```
from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested (_data.py line 6)
from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested
```
by
```
from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested (_data.py line 6)
from sktime.utils.data_container import tabularize, from_3d_numpy_to_nested
```
* We also modified InceptionTime to use binary_crossentropy (change loss name and use sigmod layer with 1 neuron as an output) and weighted accuracy for early stopping. This is not mandatory but is more suited to our problem.
......
# -*- coding: utf-8 -*-
# In[1]:
import numpy as np
from pyts.classification import BOSSVS
......
# -*- coding: utf-8 -*-
# In[1]:
import numpy as np
import pandas as pd
......@@ -49,6 +50,7 @@ class Gramian_transform(BaseEstimator, TransformerMixin):
self.transformer = GramianAngularField(image_size=self.img_size,
method=self.method,
flatten=self.flatten)
self.transformer.fit(X)
return self
class Recurrence_transform(BaseEstimator, TransformerMixin):
......@@ -77,9 +79,11 @@ class Recurrence_transform(BaseEstimator, TransformerMixin):
self.approximator = PiecewiseAggregateApproximation(output_size=self.output_size,
window_size=None,
overlapping=False)
self.approximator.fit(X)
self.transformer = RecurrencePlot(dimension=self.dimension,
time_delay=self.time_delay,
flatten=self.flatten)
self.transformer.fit(X)
return self
class PiecewiseApproximation_transform(BaseEstimator, TransformerMixin):
......@@ -101,6 +105,7 @@ class PiecewiseApproximation_transform(BaseEstimator, TransformerMixin):
self.transformer = PiecewiseAggregateApproximation(output_size=self.output_size,
window_size=self.window_size,
overlapping=self.overlapping)
self.transformer.fit(X)
return self
class SymbolicAggregate_transform(BaseEstimator, TransformerMixin):
......@@ -119,6 +124,7 @@ class SymbolicAggregate_transform(BaseEstimator, TransformerMixin):
self.transformer = SymbolicAggregateApproximation(n_bins=self.n_bins,
strategy=self.strategy,
alphabet=self.alphabet)
self.transformer.fit(X)
return self
class SymbolicFourrier_transform(BaseEstimator, TransformerMixin):
......@@ -139,23 +145,24 @@ class SymbolicFourrier_transform(BaseEstimator, TransformerMixin):
X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
return X
def fit(self, X, y=None):
def fit(self, X, y):
self.transformer = SymbolicFourierApproximation(n_coefs=self.n_coefs, n_bins=self.n_bins,
strategy=self.strategy, alphabet=self.alphabet,
drop_sum=self.drop_sum, anova=self.anova,
norm_mean=self.norm_mean, norm_std=self.norm_std)
X = X.reshape(X.shape[0],X.shape[1])
self.transformer.fit(X,y)
return self
class MatrixProfile_transform(BaseEstimator, TransformerMixin):
class MatrixProfile_transform():
def __init__(self, window_size=0.075):
self._window_size=window_size
self.window_size = window_size
def transform(self, X, y=None):
if type(X[0]) == pd.core.series.Series:
X = np.asarray([x.values for x in X])
X = np.asarray([mp.compute(x.reshape(-1),windows=x.shape[0]*self._window_size)['mp'].reshape(1,-1) for x in X])
X = np.asarray([mp.compute(x.reshape(-1),windows=x.shape[0]*self.window_size)['mp'].reshape(1,-1) for x in X])
X = X.reshape(X.shape[0], X.shape[2], X.shape[1])
return X
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment