Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
T
tsc-pdm-event-log
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
lifo
Antoine Guillaume
tsc-pdm-event-log
Commits
0569d929
Commit
0569d929
authored
Nov 20, 2020
by
Antoine Guillaume
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Tipos
parent
542bd35d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
16 deletions
+14
-16
CV_script.py
CV_script.py
+14
-16
No files found.
CV_script.py
View file @
0569d929
...
...
@@ -31,7 +31,7 @@ from sklearn.preprocessing import MinMaxScaler
# In[3]:
#Base path, all necessary folders are supposed to be contained in this one.
base_path
=
r"
C:/Utilisateurs/A694772/Documents/ECMLPKDD_datacopy/
"
base_path
=
r"
!! REPLACE BY YOUR PATH !!
"
#Path to the life cycles CSV files.
dataset_path
=
base_path
+
r"datasets/"
...
...
@@ -135,7 +135,8 @@ life_cycles = np.asarray([process_cycle(file_name, dataset_path,
extended_infected_interval_hours
)
for
file_name
in
file_list
])
print
(
'
\n
Data Loaded'
)
# # Define data representation functions
# # Define data encoding functions
# In[5]:
...
...
@@ -155,7 +156,6 @@ def get_R1_dict(codes):
def
get_R2_dict
(
codes
):
return
{
x
:
int
(
x
[
2
:])
for
x
in
codes
}
def
get_R3_dict
(
codes
,
spacing
=
200
):
OK_codes
=
[
"GA01000"
,
"GA02000"
,
"GA03000"
,
"GA04000"
,
"GA05000"
,
"GA06000"
,
"GA07000"
,
"GA10000"
,
"GA10011"
,
"GA10015"
,
"GA10020"
,
"GA10024"
,
"GA10031"
,
"GA10500"
,
"GA11000"
,
"GA11500"
,
"GA12000"
,
...
...
@@ -217,7 +217,7 @@ def apply_code_dict(df, code_dic, code_column='cod_evt'):
# In[11]:
pipeline_dict
=
{}
#FLATTENED IMAGE
CLASSIFIERS
#FLATTENED IMAGE
pipeline_dict
.
update
({
"PAA Gramian Flat RF"
:
make_pipeline
(
Gramian_transform
(
flatten
=
True
),
Random_Forest
())})
...
...
@@ -301,7 +301,6 @@ pipeline_dict.update({"PAA SFA KNN":make_pipeline(PiecewiseApproximation_transfo
#SFA use Fourrier transform (DFT) and they binning with MCB, the result of this operation is not in the time domain anymore.
#TIME SERIE CLASSIFIERS + PAA + MATRIX PROFILE
pipeline_dict
.
update
({
"PAA MP TSRF"
:
make_pipeline
(
PiecewiseApproximation_transform
(
output_size
=
size
),
...
...
@@ -322,8 +321,7 @@ pipeline_dict.update({"PAA MP RISE":make_pipeline(PiecewiseApproximation_transfo
#Rocket transform
#PAA + ROCKET
pipeline_dict
.
update
({
"PAA ROCKET RF"
:
make_pipeline
(
PiecewiseApproximation_transform
(
output_size
=
size
),
ROCKET_transform
(
flatten
=
True
),
Random_Forest
())})
...
...
@@ -346,6 +344,8 @@ pipeline_dict.update({"PAA ROCKET Ridge":make_pipeline(PiecewiseApproximation_tr
SelectFromModel
(
ExtraTreesClassifier
(
n_estimators
=
300
,
class_weight
=
"balanced_subsample"
),
threshold
=
0.000001
),
Ridge_classif
())})
#PAA + MATRIX PROFILE + ROCKET
pipeline_dict
.
update
({
"PAA MP ROCKET RF"
:
make_pipeline
(
PiecewiseApproximation_transform
(
output_size
=
size
),
MatrixProfile_transform
(),
ROCKET_transform
(
flatten
=
True
),
...
...
@@ -374,6 +374,7 @@ pipeline_dict.update({"PAA MP ROCKET Ridge":make_pipeline(PiecewiseApproximation
SelectFromModel
(
ExtraTreesClassifier
(
n_estimators
=
300
,
class_weight
=
"balanced_subsample"
),
threshold
=
0.000001
),
Ridge_classif
())})
#PAA + SAX + ROCKET
pipeline_dict
.
update
({
"PAA SAX ROCKET RF"
:
make_pipeline
(
PiecewiseApproximation_transform
(
output_size
=
size
),
SymbolicAggregate_transform
(),
ROCKET_transform
(
flatten
=
True
),
...
...
@@ -400,6 +401,9 @@ pipeline_dict.update({"PAA SAX ROCKET KNN":make_pipeline(PiecewiseApproximation_
SelectFromModel
(
ExtraTreesClassifier
(
n_estimators
=
300
,
class_weight
=
"balanced_subsample"
),
threshold
=
0.000001
),
KNN_classif
())})
#ROCKET on SFA is not efficient, rocket can already extract frequency based features due to the nature of convolutional kernels.
#PAA + MP + STACKED FLAT IMAGES
pipeline_dict
.
update
({
"PAA MP Gramian + Recurrence RF"
:
make_pipeline
(
PiecewiseApproximation_transform
(
output_size
=
size
),
MatrixProfile_transform
(),
FeatureUnion
([
...
...
@@ -436,7 +440,7 @@ pipeline_dict.update({"PAA Gramian + Recurrence RF":make_pipeline(PiecewiseAppro
]),
Random_Forest
())})
#PAA + STACKED FLAT IMAGES
pipeline_dict
.
update
({
"PAA Gramian + Recurrence SVM"
:
make_pipeline
(
PiecewiseApproximation_transform
(
output_size
=
size
),
FeatureUnion
([
(
"gramian"
,
Gramian_transform
(
flatten
=
True
)),
...
...
@@ -455,8 +459,6 @@ pipeline_dict.update({"PAA Gramian + Recurrence KNN":make_pipeline(PiecewiseAppr
SelectFromModel
(
ExtraTreesClassifier
(
n_estimators
=
300
,
class_weight
=
"balanced_subsample"
),
threshold
=
0.000001
),
KNN_classif
())})
#ROCKET on SFA is not efficient, rocket can already extract frequency based features due to the nature of convolutional kernels.
"""
#This section is left commented so you have no trouble running the script without Tensorflow/GPU
#If you have error during cross validation, you can try to make the class ResNetV2
...
...
@@ -472,11 +474,11 @@ pipeline_dict.update({"PAA Recurrence ResNet50V2":make_pipeline(Recurrence_trans
"""
print
(
'Pipelines initiali
s
ed'
)
print
(
'Pipelines initiali
z
ed'
)
# In[12]:
# Critical Failure
index (CFI). As True Negatives implies that no maintenance is schedule
(so no business impact),
# Critical Failure
Index (CFI). As True Negatives implies that no maintenance is scheduled
(so no business impact),
# this measure indicate how many maintenance operation we "missed" (False Negatives) plus how many we did
# while it was not necessary to do so (False Positives). Then those two variables are summed and
# divided by their sum plus the number of successful prediction (True Positives).
...
...
@@ -504,10 +506,6 @@ def report(pipeline, r ,b_accs, cfis, f1s, decimals=3):
df_res
=
pd
.
DataFrame
(
columns
=
[
'name'
,
'representation'
,
'balanced accuracy mean'
,
'CFI mean'
,
'F1 score mean'
,
'Fit time mean'
,
'Score time mean'
'balanced accuracy std'
,
'CFI std'
,
'F1 score std'
,
'Fit time std'
,
'Score time std'
])
# In[14]:
print
(
'Cross Validation'
)
order
=
{
0
:
'R1'
,
1
:
'R2'
,
2
:
'R3'
,
3
:
'R4'
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment